[Mesa-dev] [PATCH] i965: Support GL_CLAMP natively on Broadwell.

2014-05-29 Thread Kenneth Graunke
The new hardware actually supports this OpenGL 1.x feature natively,
so we can finally drop our shader workarounds.

Not many applications use GL_CLAMP, and most use it unintentionally, but
it's trivial to do right, so we should.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/i965/brw_defines.h  |  1 +
 src/mesa/drivers/dri/i965/brw_wm.c   |  3 ++-
 src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 13 ++---
 3 files changed, 13 insertions(+), 4 deletions(-)

No Piglit regressions on Broadwell.

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index c38e447..8996b02 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -606,6 +606,7 @@
 #define BRW_TEXCOORDMODE_CUBE3
 #define BRW_TEXCOORDMODE_CLAMP_BORDER4
 #define BRW_TEXCOORDMODE_MIRROR_ONCE 5
+#define GEN8_TEXCOORDMODE_HALF_BORDER6
 
 #define BRW_THREAD_PRIORITY_NORMAL   0
 #define BRW_THREAD_PRIORITY_HIGH 1
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index c2d38b3..d716e6f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -347,7 +347,8 @@ brw_populate_sampler_prog_key_data(struct gl_context *ctx,
  if (alpha_depth || (brw-gen  8  !brw-is_haswell))
 key-swizzles[s] = brw_get_texture_swizzle(ctx, t);
 
-if (sampler-MinFilter != GL_NEAREST 
+if (brw-gen  8 
+ sampler-MinFilter != GL_NEAREST 
 sampler-MagFilter != GL_NEAREST) {
if (sampler-WrapS == GL_CLAMP)
   key-gl_clamp_mask[0] |= 1  s;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 6f8dde4..8a4bfea 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -55,9 +55,16 @@ translate_wrap_mode(struct brw_context *brw, GLenum wrap, 
bool using_nearest)
   /* GL_CLAMP is the weird mode where coordinates are clamped to
* [0.0, 1.0], so linear filtering of coordinates outside of
* [0.0, 1.0] give you half edge texel value and half border
-   * color.  The fragment shader will clamp the coordinates, and
-   * we set clamp_border here, which gets the result desired.  We
-   * just use clamp(_to_edge) for nearest, because for nearest
+   * color.
+   *
+   * Gen8+ supports this natively.
+   */
+  if (brw-gen = 8)
+ return GEN8_TEXCOORDMODE_HALF_BORDER;
+
+  /* On Gen4-7.5, we clamp the coordinates in the fragment shader
+   * and set clamp_border here, which gets the result desired.
+   * We just use clamp(_to_edge) for nearest, because for nearest
* clamping to 1.0 gives border color instead of the desired
* edge texels.
*/
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0.5/1] i965: Pass brw to translate_wrap_mode().

2014-05-29 Thread Kenneth Graunke
This lets us do generation checks.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/i965/brw_state.h| 3 ++-
 src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 8 
 src/mesa/drivers/dri/i965/gen7_sampler_state.c   | 6 +++---
 3 files changed, 9 insertions(+), 8 deletions(-)

Sorry, neglected to send out this patch - it goes before
i965: Support GL_CLAMP natively on Broadwell.
to make it actually compile.

diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index dbcf7c7..c52a977 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -243,7 +243,8 @@ void gen7_upload_3dstate_so_decl_list(struct brw_context 
*brw,
 void gen8_init_vtable_surface_functions(struct brw_context *brw);
 
 /* brw_wm_sampler_state.c */
-uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest);
+uint32_t translate_wrap_mode(struct brw_context *brw,
+ GLenum wrap, bool using_nearest);
 void upload_default_color(struct brw_context *brw,
  struct gl_sampler_object *sampler,
  int unit,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index bbfd9e0..6f8dde4 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -46,7 +46,7 @@
 
 
 uint32_t
-translate_wrap_mode(GLenum wrap, bool using_nearest)
+translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest)
 {
switch( wrap ) {
case GL_REPEAT:
@@ -276,11 +276,11 @@ static void brw_update_sampler_state(struct brw_context 
*brw,
   }
}
 
-   sampler-ss1.r_wrap_mode = translate_wrap_mode(gl_sampler-WrapR,
+   sampler-ss1.r_wrap_mode = translate_wrap_mode(brw, gl_sampler-WrapR,
  using_nearest);
-   sampler-ss1.s_wrap_mode = translate_wrap_mode(gl_sampler-WrapS,
+   sampler-ss1.s_wrap_mode = translate_wrap_mode(brw, gl_sampler-WrapS,
  using_nearest);
-   sampler-ss1.t_wrap_mode = translate_wrap_mode(gl_sampler-WrapT,
+   sampler-ss1.t_wrap_mode = translate_wrap_mode(brw, gl_sampler-WrapT,
  using_nearest);
 
if (brw-gen = 6 
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c 
b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index 74d5e9e..6077ff2 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -103,11 +103,11 @@ gen7_update_sampler_state(struct brw_context *brw, int 
unit, int ss_index,
   }
}
 
-   sampler-ss3.r_wrap_mode = translate_wrap_mode(gl_sampler-WrapR,
+   sampler-ss3.r_wrap_mode = translate_wrap_mode(brw, gl_sampler-WrapR,
  using_nearest);
-   sampler-ss3.s_wrap_mode = translate_wrap_mode(gl_sampler-WrapS,
+   sampler-ss3.s_wrap_mode = translate_wrap_mode(brw, gl_sampler-WrapS,
  using_nearest);
-   sampler-ss3.t_wrap_mode = translate_wrap_mode(gl_sampler-WrapT,
+   sampler-ss3.t_wrap_mode = translate_wrap_mode(brw, gl_sampler-WrapT,
  using_nearest);
 
/* Cube-maps on 965 and later must use the same wrap mode for all 3
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965/fs: Drop extra MOVs for MATH operands on Broadwell.

2014-05-29 Thread Kenneth Graunke
Broadwell can handle MATH on immediates, and can also handle source
modifiers.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Cc: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5605db3..8548488 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1334,6 +1334,10 @@ fs_visitor::emit_sampleid_setup(ir_variable *ir)
 fs_reg
 fs_visitor::fix_math_operand(fs_reg src)
 {
+   /* Gen8+ just works. */
+   if (brw-gen = 8)
+  return src;
+
/* Can't do hstride == 0 args on gen6 math, so expand it out. We
 * might be able to do better by doing execsize = 1 math and then
 * expanding that result out, but we would need to be careful with
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965/vec4: Drop extra MOVs for MATH operands on Broadwell.

2014-05-29 Thread Kenneth Graunke
Broadwell can handle MATH on immediates, and can also handle source
modifiers.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Cc: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 54a40dd..ec288b7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -301,6 +301,10 @@ vec4_visitor::fix_3src_operand(src_reg src)
 src_reg
 vec4_visitor::fix_math_operand(src_reg src)
 {
+   /* Gen8+ just works. */
+   if (brw-gen = 8)
+  return src;
+
/* The gen6 math instruction ignores the source modifiers --
 * swizzle, abs, negate, and at least some parts of the register
 * region description.
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Don't emit SURFACE_STATEs for gather workarounds on Broadwell.

2014-05-29 Thread Kenneth Graunke
As far as I can tell, Broadwell doesn't need any of the SURFACE_STATE
workarounds for textureGather() bugs, so there's no need to emit
a second set of identical copies.

To keep things simple, just point the gather surface index base to the
same place as the texture surface index base.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Cc: 10.2 mesa-sta...@lists.freedesktop.org
---
 src/mesa/drivers/dri/i965/brw_shader.cpp |  9 +++--
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 14 --
 2 files changed, 15 insertions(+), 8 deletions(-)

No Piglit regressions on Broadwell with -t texture.

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index f4f1334..7fae416 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -740,8 +740,13 @@ 
backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table
}
 
if (prog-UsesGather) {
-  stage_prog_data-binding_table.gather_texture_start = 
next_binding_table_offset;
-  next_binding_table_offset += num_textures;
+  if (brw-gen = 8) {
+ stage_prog_data-binding_table.gather_texture_start =
+stage_prog_data-binding_table.texture_start;
+  } else {
+ stage_prog_data-binding_table.gather_texture_start = 
next_binding_table_offset;
+ next_binding_table_offset += num_textures;
+  }
} else {
   stage_prog_data-binding_table.gather_texture_start = 0xd0d0d0d0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index c9d9614..3279d3b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -829,12 +829,14 @@ brw_update_texture_surfaces(struct brw_context *brw)
/* emit alternate set of surface state for gather. this
 * allows the surface format to be overriden for only the
 * gather4 messages. */
-   if (vs  vs-UsesGather)
-  update_stage_texture_surfaces(brw, vs, brw-vs.base, true);
-   if (gs  gs-UsesGather)
-  update_stage_texture_surfaces(brw, gs, brw-gs.base, true);
-   if (fs  fs-UsesGather)
-  update_stage_texture_surfaces(brw, fs, brw-wm.base, true);
+   if (brw-gen  8) {
+  if (vs  vs-UsesGather)
+ update_stage_texture_surfaces(brw, vs, brw-vs.base, true);
+  if (gs  gs-UsesGather)
+ update_stage_texture_surfaces(brw, gs, brw-gs.base, true);
+  if (fs  fs-UsesGather)
+ update_stage_texture_surfaces(brw, fs, brw-wm.base, true);
+   }
 
brw-state.dirty.brw |= BRW_NEW_SURFACES;
 }
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] i965: Don't emit SURFACE_STATEs for gather workarounds on Broadwell.

2014-05-29 Thread Chris Forbes
Did this test run include forcing ARB_gpu_shader5 on?

On Thu, May 29, 2014 at 7:06 PM, Kenneth Graunke kenn...@whitecape.org wrote:
 As far as I can tell, Broadwell doesn't need any of the SURFACE_STATE
 workarounds for textureGather() bugs, so there's no need to emit
 a second set of identical copies.

 To keep things simple, just point the gather surface index base to the
 same place as the texture surface index base.

 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 Cc: 10.2 mesa-sta...@lists.freedesktop.org
 ---
  src/mesa/drivers/dri/i965/brw_shader.cpp |  9 +++--
  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 14 --
  2 files changed, 15 insertions(+), 8 deletions(-)

 No Piglit regressions on Broadwell with -t texture.

 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
 b/src/mesa/drivers/dri/i965/brw_shader.cpp
 index f4f1334..7fae416 100644
 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
 @@ -740,8 +740,13 @@ 
 backend_visitor::assign_common_binding_table_offsets(uint32_t 
 next_binding_table
 }

 if (prog-UsesGather) {
 -  stage_prog_data-binding_table.gather_texture_start = 
 next_binding_table_offset;
 -  next_binding_table_offset += num_textures;
 +  if (brw-gen = 8) {
 + stage_prog_data-binding_table.gather_texture_start =
 +stage_prog_data-binding_table.texture_start;
 +  } else {
 + stage_prog_data-binding_table.gather_texture_start = 
 next_binding_table_offset;
 + next_binding_table_offset += num_textures;
 +  }
 } else {
stage_prog_data-binding_table.gather_texture_start = 0xd0d0d0d0;
 }
 diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
 b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
 index c9d9614..3279d3b 100644
 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
 +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
 @@ -829,12 +829,14 @@ brw_update_texture_surfaces(struct brw_context *brw)
 /* emit alternate set of surface state for gather. this
  * allows the surface format to be overriden for only the
  * gather4 messages. */
 -   if (vs  vs-UsesGather)
 -  update_stage_texture_surfaces(brw, vs, brw-vs.base, true);
 -   if (gs  gs-UsesGather)
 -  update_stage_texture_surfaces(brw, gs, brw-gs.base, true);
 -   if (fs  fs-UsesGather)
 -  update_stage_texture_surfaces(brw, fs, brw-wm.base, true);
 +   if (brw-gen  8) {
 +  if (vs  vs-UsesGather)
 + update_stage_texture_surfaces(brw, vs, brw-vs.base, true);
 +  if (gs  gs-UsesGather)
 + update_stage_texture_surfaces(brw, gs, brw-gs.base, true);
 +  if (fs  fs-UsesGather)
 + update_stage_texture_surfaces(brw, fs, brw-wm.base, true);
 +   }

 brw-state.dirty.brw |= BRW_NEW_SURFACES;
  }
 --
 1.9.1

 ___
 mesa-stable mailing list
 mesa-sta...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glx/dri3: request async pixmap present for swap interval 0

2014-05-29 Thread Frank Binns

On 24/04/14 13:19, Frank Binns wrote:

Typically we want swaps to occur during vertical blank to avoid
tearing. However, when the swap interval is 0 we want to go
as quickly as possible even if this results in tearing.

Indicate to the X server that it should attempt to do an
asynchronise swap when the swap interval is 0.

Signed-off-by: Frank Binns frank.bi...@imgtec.com
---
  src/glx/dri3_glx.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index b7dac8e..e385723 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -1321,6 +1321,7 @@ dri3_swap_buffers(__GLXDRIdrawable *pdraw, int64_t 
target_msc, int64_t divisor,
 xcb_connection_t *c = XGetXCBConnection(dpy);
 int buf_id = DRI3_BACK_ID(priv-cur_back);
 int64_t ret = 0;
+   uint32_t options = XCB_PRESENT_OPTION_NONE;
  
 unsigned flags = __DRI2_FLUSH_DRAWABLE;

 if (flush)
@@ -1339,6 +1340,9 @@ dri3_swap_buffers(__GLXDRIdrawable *pdraw, int64_t 
target_msc, int64_t divisor,
if (target_msc == 0)
   target_msc = priv-msc + priv-swap_interval * (priv-send_sbc - 
priv-recv_sbc);
  
+  if (priv-swap_interval == 0)

+ options |= XCB_PRESENT_OPTION_ASYNC;
+
priv-buffers[buf_id]-busy = 1;
priv-buffers[buf_id]-last_swap = priv-send_sbc;
xcb_present_pixmap(c,
@@ -1352,7 +1356,7 @@ dri3_swap_buffers(__GLXDRIdrawable *pdraw, int64_t 
target_msc, int64_t divisor,
   None, /* target_crtc 
*/
   None,
   priv-buffers[buf_id]-sync_fence,
- XCB_PRESENT_OPTION_NONE,
+ options,
   target_msc,
   divisor,
   remainder, 0, NULL);


Ping
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965: Add runtime checks for line antialiasing in Gen 6.

2014-05-29 Thread Iago Toral
Hi Kenneth,

On Wed, 2014-05-28 at 12:42 -0700, Kenneth Graunke wrote:
 On 05/27/2014 03:50 AM, Iago Toral Quiroga wrote:
  In Gen  6 the hardware generates a runtime bit that indicates whether AA 
  data
  has to be sent as part of the framebuffer write SEND message. This affects 
  the
  specific case where we have setup antialiased line rendering and we render
  polygons which have one face setup in GL_LINE mode (line antialiasing
  will be used) and the other one in GL_FILL mode (no line antialiasing 
  needed).
  
  Currently we are not doing this runtime test and instead we always send AA
  data, which produces incorrect rendering of the GL_FILL face of the polygon 
  in
  in the aforementioned scenario (verified in ironlake and gm45).
  
  In Gen4 this is, likely, a regression introduced with commit 098acf6c843. In
  Gen5 this has never worked properly. Gen  5 are not affected by this.
  
  The patch fixes the problem by adding the appropriate runtime check and
  adjusting the framebuffer write message accordingly in the conflictive
  scenario (detected with fs_visitor::runtime_check_aads_emit == TRUE).
  
  Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78679
  ---
   src/mesa/drivers/dri/i965/brw_fs.h   |  4 ++
   src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 86 
  +---
   2 files changed, 58 insertions(+), 32 deletions(-)
  
  diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
  b/src/mesa/drivers/dri/i965/brw_fs.h
  index 60a4906..ab8912f 100644
  --- a/src/mesa/drivers/dri/i965/brw_fs.h
  +++ b/src/mesa/drivers/dri/i965/brw_fs.h
  @@ -452,6 +452,10 @@ public:
   
  void emit_color_write(int target, int index, int first_color_mrf);
  void emit_alpha_test();
  +   void do_emit_fb_write(int target, int base_mrf, int mlen, bool eot,
  + bool header_present);
  +   void emit_fb_write(int target, int base_mrf, int mlen, bool eot,
  +  bool header_present);
  void emit_fb_writes();
   
  void emit_shader_time_begin();
  diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
  b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  index 171f063..4c3897b 100644
  --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  @@ -2731,6 +2731,54 @@ fs_visitor::emit_alpha_test()
   }
   
   void
  +fs_visitor::do_emit_fb_write(int target, int base_mrf, int mlen, bool eot,
  + bool header_present)
  +{
  +   fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
  +   inst-target = target;
  +   inst-base_mrf = base_mrf;
  +   inst-mlen = mlen;
  +   inst-eot = eot;
  +   inst-header_present = header_present;
  +   if ((brw-gen = 8 || brw-is_haswell)  fp-UsesKill) {
  +  inst-predicate = BRW_PREDICATE_NORMAL;
  +  inst-flag_subreg = 1;
  +   }
  +}
  +
  +void
  +fs_visitor::emit_fb_write(int target, int base_mrf, int mlen, bool eot,
  +  bool header_present)
  +{
  +   if (!runtime_check_aads_emit) {
  +  do_emit_fb_write(target, base_mrf, mlen, eot, header_present);
  +   } else {
  +  /* This can only happen in Gen  6
  +   */
  +  fs_reg reg_tmp_ud = fs_reg(this, glsl_type::uint_type);
  +  emit(AND(reg_tmp_ud,
  +   fs_reg(get_element_ud(brw_vec8_grf(1,0), 6)),
 
 I think
 
 retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD)
 
 might be a little clearer than:
 
 get_element_ud(brw_vec8_grf(1,0), 6))
 
 since it just refers to r1.6 right away, rather than r1.0 modified to
 have a suboffset of 6.

Sure, that looks better.

  +   fs_reg(brw_imm_ud(126;
  +  emit(CMP(reg_null_ud,
  +   reg_tmp_ud,
  +   fs_reg(brw_imm_ud(0)),
  +   BRW_CONDITIONAL_Z));
 
 You can actually generate a flag condition directly from the AND
 instruction, and eliminate the CMP:
 
 fs_inst *inst =
emit(AND(reg_null_ud,
 fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD),
 fs_reg(0)));
 inst-conditional_mod = BRW_CONDITIONAL_Z;
 
 (you might have to use vec1(retype(brw_null_reg), BRW_REGISTER_TYPE_UD),
 rather than reg_null_ud.)

Oh, that's much nicer! We also get rid of the temporary register with
this.

I think you messed up the parameters to the AND though. I believe this
is what you meant:

fs_inst *inst =
emit(AND(vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)),
 fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD)),
 fs_reg(brw_imm_ud(126;
inst-conditional_mod = BRW_CONDITIONAL_Z;

  +  emit(IF(BRW_PREDICATE_NORMAL));
  +  {
  + /* Shift message header one register since we are not sending
  +  * AA data stored in base_mrf+2
  +  */
  + do_emit_fb_write(target, base_mrf + 1, mlen - 1, eot, 
  header_present);
  +  }
  +  emit(BRW_OPCODE_ELSE);
  +  {
  + do_emit_fb_write(target, base_mrf, mlen, eot, header_present);
  

Re: [Mesa-dev] [PATCH V2 06/11] loader: Use drirc device_id parameter in complement to DRI_PRIME

2014-05-29 Thread Michel Dänzer
On 29.05.2014 12:22, Axel Davy wrote:
 DRI_PRIME is not very handy, because you have to launch the executable
 with it set, which is not always easy to do.
 By using drirc, the user specifies the target executable
 and the device to use. After that the program will be launched everytime
 on the target device.
 
 v2: fix compilation issue

Still doesn't build here (on top of
http://lists.freedesktop.org/archives/mesa-dev/2014-May/060312.html
to fix today's build failure), see below. This fixes it, but I'm not
sure it's the proper fix:

diff --git a/src/egl/main/Makefile.am b/src/egl/main/Makefile.am
index 38758a0..12dc42f 100644
--- a/src/egl/main/Makefile.am
+++ b/src/egl/main/Makefile.am
@@ -110,7 +110,7 @@ if HAVE_EGL_DRIVER_DRI2
 AM_CFLAGS += -D_EGL_BUILT_IN_DRIVER_DRI2
 AM_CFLAGS += -DHAVE_XCB_DRI2
 libEGL_la_LIBADD += ../drivers/dri2/libegl_dri2.la
-libEGL_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS)
+libEGL_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS) $(DRI_LIB_DEPS)
 endif

 include $(top_srcdir)/install-lib-links.mk
diff --git a/src/gbm/Makefile.am b/src/gbm/Makefile.am
index 08369f2..3bd4c14 100644
--- a/src/gbm/Makefile.am
+++ b/src/gbm/Makefile.am
@@ -32,6 +32,7 @@ libgbm_la_LDFLAGS = \

 libgbm_la_LIBADD = \
$(top_builddir)/src/loader/libloader.la \
+   $(DRI_LIB_DEPS)
$(DLOPEN_LIBS)

 if HAVE_EGL_PLATFORM_WAYLAND



make[2]: Entering directory 
'/home/daenzer/src/mesa-git/mesa/build-amd64/src/gbm'
  CCLD libgbm.la
../../src/loader/.libs/libloader.a(libloader_la-xmlconfig.o): In function 
`strToF':
/home/daenzer/src/mesa-git/mesa/build-amd64/src/loader/../../../src/mesa/drivers/dri/common/xmlconfig.c:273:
 undefined reference to `pow'
collect2: error: ld returned 1 exit status
Makefile:797: recipe for target 'libgbm.la' failed
make[2]: *** [libgbm.la] Error 1
make[2]: Leaving directory '/home/daenzer/src/mesa-git/mesa/build-amd64/src/gbm'

make[2]: Entering directory 
'/home/daenzer/src/mesa-git/mesa/build-amd64/src/egl/main'
  CCLD libEGL.la
../drivers/dri2/.libs/libegl_dri2.a(libloader_la-xmlconfig.o): In function 
`strToF':
/home/daenzer/src/mesa-git/mesa/build-amd64/src/loader/../../../src/mesa/drivers/dri/common/xmlconfig.c:273:
 undefined reference to `pow'
collect2: error: ld returned 1 exit status
Makefile:664: recipe for target 'libEGL.la' failed
make[2]: *** [libEGL.la] Error 1
make[2]: Leaving directory 
'/home/daenzer/src/mesa-git/mesa/build-amd64/src/egl/main'


-- 
Earthling Michel Dänzer|  http://www.amd.com
Libre software enthusiast  |Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 79382] build error: multiple definition of `loader_get_pci_id_for_fd'

2014-05-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=79382

--- Comment #3 from Jos van Wolput wol...@onsneteindhoven.nl ---
(In reply to comment #1)
 Does the following patch help ? http://patchwork.freedesktop.org/patch/26746/

Works for me too!

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 06/11] loader: Use drirc device_id parameter in complement to DRI_PRIME

2014-05-29 Thread Axel Davy

On 29/05/2014 04:58, Michel Dänzer wrote :

On 29.05.2014 12:22, Axel Davy wrote:

DRI_PRIME is not very handy, because you have to launch the executable
with it set, which is not always easy to do.
By using drirc, the user specifies the target executable
and the device to use. After that the program will be launched everytime
on the target device.

v2: fix compilation issue

Still doesn't build here (on top of
http://lists.freedesktop.org/archives/mesa-dev/2014-May/060312.html
to fix today's build failure), see below.



../drivers/dri2/.libs/libegl_dri2.a(libloader_la-xmlconfig.o): In function 
`strToF':
/home/daenzer/src/mesa-git/mesa/build-amd64/src/loader/../../../src/mesa/drivers/dri/common/xmlconfig.c:273:
 undefined reference to `pow'
collect2: error: ld returned 1 exit status
Makefile:664: recipe for target 'libEGL.la' failed
make[2]: *** [libEGL.la] Error 1
make[2]: Leaving directory 
'/home/daenzer/src/mesa-git/mesa/build-amd64/src/egl/main'



Does adding '-lm' to libloader_la_LIBADD fixes it ?

Axel Davy
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] gallium/docs: improve documentation of render condition wrt blits.

2014-05-29 Thread Brian Paul

On 05/28/2014 05:23 PM, srol...@vmware.com wrote:

From: Roland Scheidegger srol...@vmware.com

---
  src/gallium/docs/source/context.rst | 10 +++---
  src/gallium/include/pipe/p_state.h  |  4 ++--
  2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/gallium/docs/source/context.rst 
b/src/gallium/docs/source/context.rst
index 571ee87..a9625f3 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -392,8 +392,10 @@ Conditional Rendering
  A drawing command can be skipped depending on the outcome of a query
  (typically an occlusion query, or streamout overflow predicate).
  The ``render_condition`` function specifies the query which should be checked
-prior to rendering anything. Functions honoring render_condition include
+prior to rendering anything. Functions always honoring render_condition include
  (and are limited to) draw_vbo, clear, clear_render_target, 
clear_depth_stencil.
+The blit function (but not resource_copy_region, which seems inconsistent)
+can also optionally honor the current render condition.

  If ``render_condition`` is called with ``query`` = NULL, conditional
  rendering is disabled and drawing takes place normally.
@@ -465,8 +467,10 @@ but overlapping blits are not permitted.
  This can be considered the equivalent of a CPU memcpy.

  ``blit`` blits a region of a resource to a region of another resource, 
including
-scaling, format conversion, and up-/downsampling, as well as
-a destination clip rectangle (scissors).
+scaling, format conversion, and up-/downsampling, as well as a destination clip
+rectangle (scissors). It can also optionally honor the current render condition
+(but either way the blit itself never contributes anything to queries currently
+gathering data).
  As opposed to manually drawing a textured quad, this lets the pipe driver 
choose
  the optimal method for blitting (like using a special 2D engine), and usually
  offers, for example, accelerated stencil-only copies even where
diff --git a/src/gallium/include/pipe/p_state.h 
b/src/gallium/include/pipe/p_state.h
index 07ce88e..7257604 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -592,8 +592,8 @@ struct pipe_blit_info
 boolean scissor_enable;
 struct pipe_scissor_state scissor;

-   boolean render_condition_enable; /** whether to leave current render
-condition enabled */
+   boolean render_condition_enable; /** whether the blit should honor the
+current render condition */
  };





For the series: Reviewed-by: Brian Paul bri...@vmware.com

Should patches 2 and 3 be tagged for the 10.1 and 10.2 branches?

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles

2014-05-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=77449

Bug 77449 depends on bug 68690, which changed state.

Bug 68690 Summary: Dark screen in Europa Universalis IV on Intel, R600g and 
RadeonSI
https://bugs.freedesktop.org/show_bug.cgi?id=68690

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |NOTOURBUG

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965: Add runtime checks for line antialiasing in Gen 6.

2014-05-29 Thread Kenneth Graunke
On 05/29/2014 01:21 AM, Iago Toral wrote:
 Hi Kenneth,
 
 On Wed, 2014-05-28 at 12:42 -0700, Kenneth Graunke wrote:
 On 05/27/2014 03:50 AM, Iago Toral Quiroga wrote:
 In Gen  6 the hardware generates a runtime bit that indicates whether AA 
 data
 has to be sent as part of the framebuffer write SEND message. This affects 
 the
 specific case where we have setup antialiased line rendering and we render
 polygons which have one face setup in GL_LINE mode (line antialiasing
 will be used) and the other one in GL_FILL mode (no line antialiasing 
 needed).

 Currently we are not doing this runtime test and instead we always send AA
 data, which produces incorrect rendering of the GL_FILL face of the polygon 
 in
 in the aforementioned scenario (verified in ironlake and gm45).

 In Gen4 this is, likely, a regression introduced with commit 098acf6c843. In
 Gen5 this has never worked properly. Gen  5 are not affected by this.

 The patch fixes the problem by adding the appropriate runtime check and
 adjusting the framebuffer write message accordingly in the conflictive
 scenario (detected with fs_visitor::runtime_check_aads_emit == TRUE).

 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78679
 ---
  src/mesa/drivers/dri/i965/brw_fs.h   |  4 ++
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 86 
 +---
  2 files changed, 58 insertions(+), 32 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
 b/src/mesa/drivers/dri/i965/brw_fs.h
 index 60a4906..ab8912f 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs.h
 +++ b/src/mesa/drivers/dri/i965/brw_fs.h
 @@ -452,6 +452,10 @@ public:
  
 void emit_color_write(int target, int index, int first_color_mrf);
 void emit_alpha_test();
 +   void do_emit_fb_write(int target, int base_mrf, int mlen, bool eot,
 + bool header_present);
 +   void emit_fb_write(int target, int base_mrf, int mlen, bool eot,
 +  bool header_present);
 void emit_fb_writes();
  
 void emit_shader_time_begin();
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 index 171f063..4c3897b 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 @@ -2731,6 +2731,54 @@ fs_visitor::emit_alpha_test()
  }
  
  void
 +fs_visitor::do_emit_fb_write(int target, int base_mrf, int mlen, bool eot,
 + bool header_present)
 +{
 +   fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
 +   inst-target = target;
 +   inst-base_mrf = base_mrf;
 +   inst-mlen = mlen;
 +   inst-eot = eot;
 +   inst-header_present = header_present;
 +   if ((brw-gen = 8 || brw-is_haswell)  fp-UsesKill) {
 +  inst-predicate = BRW_PREDICATE_NORMAL;
 +  inst-flag_subreg = 1;
 +   }
 +}
 +
 +void
 +fs_visitor::emit_fb_write(int target, int base_mrf, int mlen, bool eot,
 +  bool header_present)
 +{
 +   if (!runtime_check_aads_emit) {
 +  do_emit_fb_write(target, base_mrf, mlen, eot, header_present);
 +   } else {
 +  /* This can only happen in Gen  6
 +   */
 +  fs_reg reg_tmp_ud = fs_reg(this, glsl_type::uint_type);
 +  emit(AND(reg_tmp_ud,
 +   fs_reg(get_element_ud(brw_vec8_grf(1,0), 6)),

 I think

 retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD)

 might be a little clearer than:

 get_element_ud(brw_vec8_grf(1,0), 6))

 since it just refers to r1.6 right away, rather than r1.0 modified to
 have a suboffset of 6.
 
 Sure, that looks better.
 
 +   fs_reg(brw_imm_ud(126;
 +  emit(CMP(reg_null_ud,
 +   reg_tmp_ud,
 +   fs_reg(brw_imm_ud(0)),
 +   BRW_CONDITIONAL_Z));

 You can actually generate a flag condition directly from the AND
 instruction, and eliminate the CMP:

 fs_inst *inst =
emit(AND(reg_null_ud,
 fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD),
 fs_reg(0)));
 inst-conditional_mod = BRW_CONDITIONAL_Z;

 (you might have to use vec1(retype(brw_null_reg), BRW_REGISTER_TYPE_UD),
 rather than reg_null_ud.)
 
 Oh, that's much nicer! We also get rid of the temporary register with
 this.
 
 I think you messed up the parameters to the AND though. I believe this
 is what you meant:
 
 fs_inst *inst =
 emit(AND(vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)),
  fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD)),
  fs_reg(brw_imm_ud(126;
 inst-conditional_mod = BRW_CONDITIONAL_Z;

Whoops.  Yeah, that's what I meant :)

 +  emit(IF(BRW_PREDICATE_NORMAL));
 +  {
 + /* Shift message header one register since we are not sending
 +  * AA data stored in base_mrf+2
 +  */
 + do_emit_fb_write(target, base_mrf + 1, mlen - 1, eot, 
 header_present);
 +  }
 +  emit(BRW_OPCODE_ELSE);
 +  {
 + do_emit_fb_write(target, base_mrf, mlen, eot, header_present);
 +  }
 + 

Re: [Mesa-dev] [PATCH 2/2] egl-static: resolve library linking

2014-05-29 Thread Jose Fonseca
I'm happy however you guys want to address this, as long as it builds.

The only reason I added $(top_builddir)/src/loader/libloader.la was because the 
autotools build was broken for almost a week.  I don't care as much for 
autotools build breakage as scons build breakage, but I still don't like to see 
it broken.

Jose


- Original Message -
 On Wed, May 28, 2014 at 10:58 PM, Emil Velikov emil.l.veli...@gmail.com
 wrote:
  With DRM_PLATFORM libloader.la in linked into libEGL pleading
  to multiple defition of the loader* symbols. For every other
  platform we need to explicitly link against it.
 I would prefer to see libloader.la removed from st/egl, and added to
 egl_gallium_la_LIBADD here when HAVE_EGL_PLATFORM_DRM.  The idea is
 that we don't usually do LIBADD for state trackers, but rely on
 targets to do the right thing.
 
 
  Bugzilla:
  https://urldefense.proofpoint.com/v1/url?u=https://bugs.freedesktop.org/show_bug.cgi?id%3D79263k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=NMr9uy2iTjWVixC0wOcYCWEIYhfo80qKwRgdodpoDzA%3D%0Am=3dDDaM1y2lGalSv1W9TO0xa6X%2FalOUjK0G6lPEmYiVs%3D%0As=a5e924fedf8798d6828e4c662f03b41bec73db248e91a648e3b2f6003003bd09
  Cc: José Fonseca jfons...@vmware.com
  Cc: Chia-I Wu o...@lunarg.com
  Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
  ---
   src/gallium/targets/egl-static/Makefile.am | 4 +++-
   1 file changed, 3 insertions(+), 1 deletion(-)
 
  diff --git a/src/gallium/targets/egl-static/Makefile.am
  b/src/gallium/targets/egl-static/Makefile.am
  index f4990ad..224ed95 100644
  --- a/src/gallium/targets/egl-static/Makefile.am
  +++ b/src/gallium/targets/egl-static/Makefile.am
  @@ -66,7 +66,6 @@ egl_gallium_la_SOURCES = \
  egl_st.c
 
   egl_gallium_la_LIBADD = \
  -   $(top_builddir)/src/loader/libloader.la \
  $(top_builddir)/src/gallium/auxiliary/libgallium.la \
  $(top_builddir)/src/gallium/drivers/identity/libidentity.la \
  $(top_builddir)/src/gallium/drivers/trace/libtrace.la \
  @@ -100,6 +99,9 @@ AM_CPPFLAGS += $(LIBDRM_CFLAGS)
   egl_gallium_la_LIBADD += \
  $(top_builddir)/src/gbm/libgbm.la \
  $(LIBDRM_LIBS)
  +else
  +egl_gallium_la_LIBADD += \
  +   $(top_builddir)/src/loader/libloader.la
   endif
 
   if HAVE_EGL_PLATFORM_FBDEV
  --
  1.9.3
 
 
 
 
 --
 o...@lunarg.com
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 79382] build error: multiple definition of `loader_get_pci_id_for_fd'

2014-05-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=79382

--- Comment #4 from Fabio Pedretti fabio@libero.it ---
I cannot test myself, if it works for others, push it.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965/fs: Drop extra MOVs for MATH operands on Broadwell.

2014-05-29 Thread Matt Turner
These two are:

Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Support GL_CLAMP natively on Broadwell.

2014-05-29 Thread Matt Turner
These two are

Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 79421] New: [llvmpipe] SIGSEGV src/gallium/drivers/llvmpipe/lp_rast_priv.h:218

2014-05-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=79421

  Priority: medium
Bug ID: 79421
  Keywords: have-backtrace
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: [llvmpipe] SIGSEGV
src/gallium/drivers/llvmpipe/lp_rast_priv.h:218
  Severity: normal
Classification: Unclassified
OS: Linux (All)
  Reporter: v...@freedesktop.org
  Hardware: x86-64 (AMD64)
Status: NEW
   Version: git
 Component: Other
   Product: Mesa

mesa: c0bd206a148b8fcda253359efe8c95871f89bd05 (master 10.3.0-devel)

Run piglit test fbo-drawbuffers-none glBlendFunci on llvmpipe.

$ ./bin/fbo-drawbuffers-none glBlendFunci -auto
Testing glBlendFunci.
Segmentation fault (core dumped)

(gdb) bt
#0  0x7f49681c56a7 in lp_rast_get_unswizzled_color_block_pointer
(task=0x1e27838, buf=0, x=0, y=0, layer=0)
at src/gallium/drivers/llvmpipe/lp_rast_priv.h:218
#1  0x7f49681c6328 in lp_rast_shade_tile (task=0x1e27838, arg=...) at
src/gallium/drivers/llvmpipe/lp_rast.c:316
#2  0x7f49681c6d5b in do_rasterize_bin (task=0x1e27838, bin=0x7f496b95b1e8,
x=0, y=0)
at src/gallium/drivers/llvmpipe/lp_rast.c:589
#3  0x7f49681c6dc8 in rasterize_bin (task=0x1e27838, bin=0x7f496b95b1e8,
x=0, y=0)
at src/gallium/drivers/llvmpipe/lp_rast.c:608
#4  0x7f49681c6ed8 in rasterize_scene (task=0x1e27838,
scene=0x7f496b95b010)
at src/gallium/drivers/llvmpipe/lp_rast.c:657
#5  0x7f49681c71a4 in thread_function (init_data=0x1e27838) at
src/gallium/drivers/llvmpipe/lp_rast.c:778
#6  0x7f49681c4ce9 in impl_thrd_routine (p=0x1e18c40) at
include/c11/threads_posix.h:87
#7  0x7f496716c182 in start_thread (arg=0x7f4966b0f700) at
pthread_create.c:312
#8  0x7f496afb330d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:111
(gdb) frame 0
#0  0x7f49681c56a7 in lp_rast_get_unswizzled_color_block_pointer
(task=0x1e27838, buf=0, x=0, y=0, layer=0)
at src/gallium/drivers/llvmpipe/lp_rast_priv.h:218
218   format_bytes =
util_format_get_blocksize(task-scene-fb.cbufs[buf]-format);
(gdb) print task-scene-fb.cbufs[buf]
$1 = (struct pipe_surface *) 0x0

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] gallium/docs: improve documentation of render condition wrt blits.

2014-05-29 Thread Roland Scheidegger
Am 29.05.2014 15:53, schrieb Brian Paul:
 On 05/28/2014 05:23 PM, srol...@vmware.com wrote:
 From: Roland Scheidegger srol...@vmware.com

 ---
   src/gallium/docs/source/context.rst | 10 +++---
   src/gallium/include/pipe/p_state.h  |  4 ++--
   2 files changed, 9 insertions(+), 5 deletions(-)

 diff --git a/src/gallium/docs/source/context.rst
 b/src/gallium/docs/source/context.rst
 index 571ee87..a9625f3 100644
 --- a/src/gallium/docs/source/context.rst
 +++ b/src/gallium/docs/source/context.rst
 @@ -392,8 +392,10 @@ Conditional Rendering
   A drawing command can be skipped depending on the outcome of a query
   (typically an occlusion query, or streamout overflow predicate).
   The ``render_condition`` function specifies the query which should
 be checked
 -prior to rendering anything. Functions honoring render_condition include
 +prior to rendering anything. Functions always honoring
 render_condition include
   (and are limited to) draw_vbo, clear, clear_render_target,
 clear_depth_stencil.
 +The blit function (but not resource_copy_region, which seems
 inconsistent)
 +can also optionally honor the current render condition.

   If ``render_condition`` is called with ``query`` = NULL, conditional
   rendering is disabled and drawing takes place normally.
 @@ -465,8 +467,10 @@ but overlapping blits are not permitted.
   This can be considered the equivalent of a CPU memcpy.

   ``blit`` blits a region of a resource to a region of another
 resource, including
 -scaling, format conversion, and up-/downsampling, as well as
 -a destination clip rectangle (scissors).
 +scaling, format conversion, and up-/downsampling, as well as a
 destination clip
 +rectangle (scissors). It can also optionally honor the current render
 condition
 +(but either way the blit itself never contributes anything to queries
 currently
 +gathering data).
   As opposed to manually drawing a textured quad, this lets the pipe
 driver choose
   the optimal method for blitting (like using a special 2D engine),
 and usually
   offers, for example, accelerated stencil-only copies even where
 diff --git a/src/gallium/include/pipe/p_state.h
 b/src/gallium/include/pipe/p_state.h
 index 07ce88e..7257604 100644
 --- a/src/gallium/include/pipe/p_state.h
 +++ b/src/gallium/include/pipe/p_state.h
 @@ -592,8 +592,8 @@ struct pipe_blit_info
  boolean scissor_enable;
  struct pipe_scissor_state scissor;

 -   boolean render_condition_enable; /** whether to leave current render
 -condition enabled */
 +   boolean render_condition_enable; /** whether the blit should
 honor the
 +current render condition */
   };



 
 For the series: Reviewed-by: Brian Paul bri...@vmware.com
 
 Should patches 2 and 3 be tagged for the 10.1 and 10.2 branches?
 

10.1 doesn't have the render_condition_enable bit in pipe_blit_info as
it was more recently added. I'm not convinced it's worth bothering with
this for 10.2 though I could be convinced otherwise (the only thing
breaking due to this I know of is the piglit test).

Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 01/11] gallium: Add __DRIimageDriverExtension support to gallium

2014-05-29 Thread Marek Olšák
This is just copy-pasted from dri2_drawable_process_buffers. While
that might be the easiest thing to do for an outside contributor, it
really doesn't help the people who have to maintain the code. I'd like
this patch to be scratched and rewritten by either reusing
dri2_drawable_process_buffers or extracting the common code into one
or more separate functions.

Marek

On Thu, May 29, 2014 at 5:21 AM, Axel Davy axel.d...@ens.fr wrote:
 From: Keith Packard kei...@keithp.com

 Provide the hook to pull textures out of __DRIimage structures and use them as
 renderbuffers.

 Signed-off-by: Keith Packard kei...@keithp.com
 ---
 v2: Added the image-flushFrontBuffer fix.

  src/gallium/state_trackers/dri/drm/dri2.c | 238 
 +-
  1 file changed, 230 insertions(+), 8 deletions(-)

 diff --git a/src/gallium/state_trackers/dri/drm/dri2.c 
 b/src/gallium/state_trackers/dri/drm/dri2.c
 index 7dccc5e..2d93686 100644
 --- a/src/gallium/state_trackers/dri/drm/dri2.c
 +++ b/src/gallium/state_trackers/dri/drm/dri2.c
 @@ -498,6 +498,219 @@ dri2_release_buffer(__DRIscreen *sPriv, __DRIbuffer 
 *bPriv)
 FREE(buffer);
  }

 +static void
 +dri_image_allocate_textures(struct dri_context *ctx,
 +   struct dri_drawable *drawable,
 +   const enum st_attachment_type *statts,
 +   unsigned statts_count)
 +{
 +   __DRIdrawable *dPriv = drawable-dPriv;
 +   __DRIscreen *sPriv = drawable-sPriv;
 +   struct dri_screen *screen = dri_screen(sPriv);
 +   unsigned int image_format = __DRI_IMAGE_FORMAT_NONE;
 +   uint32_t buffer_mask = 0;
 +   struct __DRIimageList images;
 +   boolean alloc_depthstencil = FALSE;
 +   int i, j;
 +   struct pipe_resource templ;
 +
 +   /* See if we need a depth-stencil buffer. */
 +   for (i = 0; i  statts_count; i++) {
 +  if (statts[i] == ST_ATTACHMENT_DEPTH_STENCIL) {
 + alloc_depthstencil = TRUE;
 + break;
 +  }
 +   }
 +
 +   /* Delete the resources we won't need. */
 +   for (i = 0; i  ST_ATTACHMENT_COUNT; i++) {
 +  /* Don't delete the depth-stencil buffer, we can reuse it. */
 +  if (i == ST_ATTACHMENT_DEPTH_STENCIL  alloc_depthstencil)
 + continue;
 +
 +  pipe_resource_reference(drawable-textures[i], NULL);
 +   }
 +
 +   if (drawable-stvis.samples  1) {
 +  for (i = 0; i  ST_ATTACHMENT_COUNT; i++) {
 + boolean del = TRUE;
 +
 + /* Don't delete MSAA resources for the attachments which are 
 enabled,
 +  * we can reuse them. */
 + for (j = 0; j  statts_count; j++) {
 +if (i == statts[j]) {
 +   del = FALSE;
 +   break;
 +}
 + }
 +
 + if (del) {
 +pipe_resource_reference(drawable-msaa_textures[i], NULL);
 + }
 +  }
 +   }
 +
 +   for (i = 0; i  statts_count; i++) {
 +  enum pipe_format pf;
 +  unsigned bind;
 +
 +  dri_drawable_get_format(drawable, statts[i], pf, bind);
 +  if (pf == PIPE_FORMAT_NONE)
 + continue;
 +
 +  switch (pf) {
 +  case PIPE_FORMAT_B5G6R5_UNORM:
 + image_format = __DRI_IMAGE_FORMAT_RGB565;
 + break;
 +  case PIPE_FORMAT_B8G8R8X8_UNORM:
 + image_format = __DRI_IMAGE_FORMAT_XRGB;
 + break;
 +  case PIPE_FORMAT_B8G8R8A8_UNORM:
 + image_format = __DRI_IMAGE_FORMAT_ARGB;
 + break;
 +  case PIPE_FORMAT_R8G8B8A8_UNORM:
 + image_format = __DRI_IMAGE_FORMAT_ABGR;
 + break;
 +  default:
 + image_format = __DRI_IMAGE_FORMAT_NONE;
 + break;
 +  }
 +
 +  switch (statts[i]) {
 +  case ST_ATTACHMENT_FRONT_LEFT:
 + buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
 + break;
 +  case ST_ATTACHMENT_BACK_LEFT:
 + buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
 + break;
 +  default:
 + continue;
 +  }
 +   }
 +
 +   (*sPriv-image.loader-getBuffers) (dPriv,
 +   image_format,
 +   dPriv-dri2.stamp,
 +   dPriv-loaderPrivate,
 +   buffer_mask,
 +   images);
 +
 +   if (images.image_mask  __DRI_IMAGE_BUFFER_FRONT) {
 +  struct pipe_resource *texture = images.front-texture;
 +
 +  dPriv-w = texture-width0;
 +  dPriv-h = texture-height0;
 +
 +  pipe_resource_reference(drawable-textures[ST_ATTACHMENT_FRONT_LEFT], 
 texture);
 +   }
 +
 +   if (images.image_mask  __DRI_IMAGE_BUFFER_BACK) {
 +  struct pipe_resource *texture = images.back-texture;
 +
 +  dPriv-w = images.back-texture-width0;
 +  dPriv-h = images.back-texture-height0;
 +
 +  pipe_resource_reference(drawable-textures[ST_ATTACHMENT_BACK_LEFT], 
 texture);
 +   }
 +
 +   memset(templ, 0, sizeof(templ));
 +   templ.target = screen-target;
 +   templ.last_level = 0;
 +   templ.width0 = dPriv-w;
 +   

Re: [Mesa-dev] [PATCH V2 08/11] Gallium/dri2: implement blitImage

2014-05-29 Thread Marek Olšák
Hi Axel,

flush_resource should be called before ctx-st-flush.

Marek

On Thu, May 29, 2014 at 5:22 AM, Axel Davy axel.d...@ens.fr wrote:
 Signed-off-by: Axel Davy axel.d...@ens.fr
 ---
  src/gallium/state_trackers/dri/drm/dri2.c | 43 
 ---
  1 file changed, 40 insertions(+), 3 deletions(-)

 diff --git a/src/gallium/state_trackers/dri/drm/dri2.c 
 b/src/gallium/state_trackers/dri/drm/dri2.c
 index eefbe42..f4db3d8 100644
 --- a/src/gallium/state_trackers/dri/drm/dri2.c
 +++ b/src/gallium/state_trackers/dri/drm/dri2.c
 @@ -1251,6 +1251,42 @@ dri2_from_dma_bufs(__DRIscreen *screen,
  }

  static void
 +dri2_blit_image(__DRIcontext *context, __DRIimage *dst, __DRIimage *src,
 +int dstx0, int dsty0, int dstwidth, int dstheight,
 +int srcx0, int srcy0, int srcwidth, int srcheight)
 +{
 +   struct dri_context *ctx = dri_context(context);
 +   struct pipe_context *pipe = ctx-st-pipe;
 +   struct pipe_blit_info blit;
 +
 +   if (!dst || !src)
 +  return;
 +
 +   memset(blit, 0, sizeof(blit));
 +   blit.dst.resource = dst-texture;
 +   blit.dst.box.x = dstx0;
 +   blit.dst.box.y = dsty0;
 +   blit.dst.box.width = dstwidth;
 +   blit.dst.box.height = dstheight;
 +   blit.dst.box.depth = 1;
 +   blit.dst.format = dst-texture-format;
 +   blit.src.resource = src-texture;
 +   blit.src.box.x = srcx0;
 +   blit.src.box.y = srcy0;
 +   blit.src.box.width = srcwidth;
 +   blit.src.box.height = srcheight;
 +   blit.src.box.depth = 1;
 +   blit.src.format = src-texture-format;
 +   blit.mask = PIPE_MASK_RGBA;
 +   blit.filter = PIPE_TEX_FILTER_NEAREST;
 +
 +   pipe-blit(pipe, blit);
 +
 +   ctx-st-flush(ctx-st, 0, NULL);
 +   pipe-flush_resource(pipe, dst-texture);
 +}
 +
 +static void
  dri2_destroy_image(__DRIimage *img)
  {
 pipe_resource_reference(img-texture, NULL);
 @@ -1259,7 +1295,7 @@ dri2_destroy_image(__DRIimage *img)

  /* The extension is modified during runtime if DRI_PRIME is detected */
  static __DRIimageExtension dri2ImageExtension = {
 -.base = { __DRI_IMAGE, 6 },
 +.base = { __DRI_IMAGE, 9 },

  .createImageFromName  = dri2_create_image_from_name,
  .createImageFromRenderbuffer  = dri2_create_image_from_renderbuffer,
 @@ -1271,6 +1307,9 @@ static __DRIimageExtension dri2ImageExtension = {
  .createImageFromNames = dri2_from_names,
  .fromPlanar   = dri2_from_planar,
  .createImageFromTexture   = dri2_create_from_texture,
 +.createImageFromFds   = NULL,
 +.createImageFromDmaBufs   = NULL,
 +.blitImage= dri2_blit_image,
  };

  /*
 @@ -1325,8 +1364,6 @@ dri2_init_screen(__DRIscreen * sPriv)

if (drmGetCap(sPriv-fd, DRM_CAP_PRIME, cap) == 0 
(cap  DRM_PRIME_CAP_IMPORT)) {
 -
 - dri2ImageExtension.base.version = 8;
   dri2ImageExtension.createImageFromFds = dri2_from_fds;
   dri2ImageExtension.createImageFromDmaBufs = dri2_from_dma_bufs;
}
 --
 1.9.1

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 11/11] Radeonsi: Use dma_copy when possible for si_blit.

2014-05-29 Thread Marek Olšák
Hi Axel,

Could you please describe what GPU offloading means?

Some checks can be modified to:

info-src.box.width  0 
info-src.box.height  0 
... resource-nr_samples = 1

You don't have to check if the box depth equals 1. You just need to
make sure the box depths are equal to each other, like widths and
heights.

Marek

On Thu, May 29, 2014 at 5:22 AM, Axel Davy axel.d...@ens.fr wrote:
 This improves GLX DRI3 Gpu offloading significantly on cpu
 bound benchmarks particularly.
 No performance impact for DRI2 Gpu offloading.

 v2: Add missing tests

 Signed-off-by: Axel Davy axel.d...@ens.fr
 ---
  src/gallium/drivers/radeonsi/si_blit.c | 20 
  1 file changed, 20 insertions(+)

 diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
 b/src/gallium/drivers/radeonsi/si_blit.c
 index 6bc89ab..c885a6d 100644
 --- a/src/gallium/drivers/radeonsi/si_blit.c
 +++ b/src/gallium/drivers/radeonsi/si_blit.c
 @@ -712,6 +712,26 @@ static void si_blit(struct pipe_context *ctx,
 return;
 }

 +   if (info-src.box.width ==  info-dst.box.width 
 +   info-src.box.height ==  info-dst.box.height 
 +   info-src.format == info-dst.format 
 +   info-src.box.width = 0 
 +   info-src.box.height = 0 
 +   info-src.resource-nr_samples == 0 
 +   info-dst.resource-nr_samples == 0 
 +   info-src.box.depth == 1 
 +   info-dst.box.depth == 1 
 +   info-mask == PIPE_MASK_RGBA 
 +   !info-scissor_enable 
 +   (!info-render_condition_enable ||
 +!sctx-b.current_render_cond)) {
 +   sctx-b.dma_copy(ctx, info-dst.resource, info-dst.level,
 +info-dst.box.x, info-dst.box.y,
 +info-dst.box.z, info-src.resource,
 +info-src.level, (info-src.box));
 +   return;
 +   }
 +
 assert(util_blitter_is_blit_supported(sctx-blitter, info));

 /* The driver doesn't decompress resources automatically while
 --
 1.9.1

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glx: Don't uselessly dlopen libGL within libGL

2014-05-29 Thread Adam Jackson
This is entirely pointless.  The DRI driver does not need any symbols
from libGL, even if it did libGL would already be available to resolve
them because that's how dlopen works.

Signed-off-by: Adam Jackson a...@redhat.com
---
 src/glx/dri_common.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
index e5a3f70..235d350 100644
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -93,14 +93,11 @@ dri_message(int level, const char *f, ...)
 _X_HIDDEN void *
 driOpenDriver(const char *driverName)
 {
-   void *glhandle, *handle;
+   void *handle;
const char *libPaths, *p, *next;
char realDriverName[200];
int len;
 
-   /* Attempt to make sure libGL symbols will be visible to the driver */
-   glhandle = dlopen(libGL.so.1, RTLD_NOW | RTLD_LOCAL);
-
libPaths = NULL;
if (geteuid() == getuid()) {
   /* don't allow setuid apps to use LIBGL_DRIVERS_PATH */
@@ -146,9 +143,6 @@ driOpenDriver(const char *driverName)
if (!handle)
   ErrorMessageF(unable to load driver: %s_dri.so\n, driverName);
 
-   if (glhandle)
-  dlclose(glhandle);
-
return handle;
 }
 
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glx: Don't uselessly dlopen libGL within libGL

2014-05-29 Thread Jose Fonseca
- Original Message -
 This is entirely pointless.  The DRI driver does not need any symbols
 from libGL, even if it did 

I think DRI drivers did at one point depend on getting some glapi symbols from 
libGL.so.  I don't recall which situations the DRI driver could be loaded 
without libGL though (maybe indirect rendering inside X?)

Anyway, I agree is not nice to dlopen(libGL).  I do recall stumbling into these 
confusing dlopens when debugging apitrace.

Jose

 libGL would already be available to resolve
 them because that's how dlopen works.
 
 Signed-off-by: Adam Jackson a...@redhat.com
 ---
  src/glx/dri_common.c | 8 +---
  1 file changed, 1 insertion(+), 7 deletions(-)
 
 diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
 index e5a3f70..235d350 100644
 --- a/src/glx/dri_common.c
 +++ b/src/glx/dri_common.c
 @@ -93,14 +93,11 @@ dri_message(int level, const char *f, ...)
  _X_HIDDEN void *
  driOpenDriver(const char *driverName)
  {
 -   void *glhandle, *handle;
 +   void *handle;
 const char *libPaths, *p, *next;
 char realDriverName[200];
 int len;
  
 -   /* Attempt to make sure libGL symbols will be visible to the driver */
 -   glhandle = dlopen(libGL.so.1, RTLD_NOW | RTLD_LOCAL);
 -
 libPaths = NULL;
 if (geteuid() == getuid()) {
/* don't allow setuid apps to use LIBGL_DRIVERS_PATH */
 @@ -146,9 +143,6 @@ driOpenDriver(const char *driverName)
 if (!handle)
ErrorMessageF(unable to load driver: %s_dri.so\n, driverName);
  
 -   if (glhandle)
 -  dlclose(glhandle);
 -
 return handle;
  }
  
 --
 1.9.3
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 https://urldefense.proofpoint.com/v1/url?u=http://lists.freedesktop.org/mailman/listinfo/mesa-devk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=NMr9uy2iTjWVixC0wOcYCWEIYhfo80qKwRgdodpoDzA%3D%0Am=VVUsK%2BQzrCw4fR82%2BqwgGqUu17b9yHhCTZ2d3K0ddfc%3D%0As=3be3e011473b906a7b7ef6e3f00d1c662f475caae7105be21067f4a46a199675
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] softpipe: honor the render_condition_enable bit in blits.

2014-05-29 Thread Jose Fonseca
Series is 

- Original Message -
 From: Roland Scheidegger srol...@vmware.com
 
 ---
  src/gallium/drivers/softpipe/sp_surface.c | 3 +++
  1 file changed, 3 insertions(+)
 
 diff --git a/src/gallium/drivers/softpipe/sp_surface.c
 b/src/gallium/drivers/softpipe/sp_surface.c
 index 102e8e8..768e898 100644
 --- a/src/gallium/drivers/softpipe/sp_surface.c
 +++ b/src/gallium/drivers/softpipe/sp_surface.c
 @@ -36,6 +36,9 @@ static void sp_blit(struct pipe_context *pipe,
  {
 struct softpipe_context *sp = softpipe_context(pipe);
  
 +   if (info-render_condition_enable  !softpipe_check_render_cond(sp))
 +  return;
 +
 if (info-src.resource-nr_samples  1 
 info-dst.resource-nr_samples = 1 
 !util_format_is_depth_or_stencil(info-src.resource-format) 
 --
 1.9.1
 


Reviewed-by: Jose Fonseca jfons...@vmware.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/23] megadriver/pipe-loader-to-all

2014-05-29 Thread Emil Velikov
humble ping

People interested in taking a look at the series ? Any input is appreciated
even if people are not familiar with mesa's build.

Francisco
Can you take a look the pipe-loader patches 04/23 - 08/23 ? They add a few
functions needed to make the pipe-loader st/dri aware. Patch 22 just moves
some of the bits in opencl/Makefile.am around, so that we can use the same
static/shared pipe-driver approach after some minor work in st/clover.

Christian, Maarten
Gents can you take a look if the video targets (patches 16-18) look sane ?
This is one step towards what you guys were discussing/thinking in the
previous iteration of pipe-loader-to-all [1] [2]

If any of you guys feel like looking at the rest of the series I would be very
grateful.

Cheers,
Emil


[1] http://lists.freedesktop.org/archives/mesa-dev/2014-February/053983.html
[2] http://lists.freedesktop.org/archives/mesa-dev/2014-February/053999.html

On 25/05/14 19:42, Emil Velikov wrote:
 ping ?
 
 For people that feel that the email is a bit vague here is a summary of what
 the different terms mean and what the series means with regards to file size
 of the final libraries.
 
 
 Pipe-driver - gallium/drivers/$hw and its respective winsys built into a
 standalone loadable module. Installed as
 gallium-pipe/pipe_(r600|radeonsi|nouveau).so
 
 Pipe-loader - aux module (linked into the final library) that loads the
 appropriate pipe-driver.
 
 Static(megadriver) - identical to what you did with megaradeon. All the
 drivers selected at configure are linked into the same blob.
 Hardlink for each target for compatibility reasons.
 
 Libraries:
 dri:(r600|radeonsi|nouveau)_dri.so   - 6.5 MiB
 vdpau:  libvdpau_(r600|radeonsi|nouveau).so  - 3.5 MiB
 
 Total: 10MiB
 
 Shared(pipe-loader) - create individual pipe-drivers and standalone
 state-tracker libraries (think of them as bla_dri + libGL). The pipe-driver is
 used by all ST. Note: the interface is not stable, unlike the dri modules.
 Hardlink for each target for compatibility reasons.
 
 Libraries:
 dri:(r600|radeonsi|nouveau)_dri.so   - 3.9 MiB
 vdpau:  libvdpau_(r600|radeonsi|nouveau).so  - 633 KiB
 gallium-pipe:   pipe_(r600|radeonsi|nouveau).so  - 5.3 MiB
 
 Total: 9.8MiB
 
 Current approach - at final link time, most state-trackers pull the
 gallium/drivers/$hw, via the above mentioned DRM_DESCRIPTOR, to create a
 independent HW specific library. Resulting in some duplication.
 
 dri:(r600|radeonsi|nouveau)_dri.so   - 5.0+4.5+5.3 = 14.8 MiB
 vdpau:  libvdpau_(r600|radeonsi|nouveau).so  - 1.9+1.2+2.3 = 5.4 MiB
 
 Total: 20.2MiB
 
 Note: currently egl-static and opencl are hardcoded to static and shared
 respectively. Both of which are will be converted with the next series.
 
 
 Summary:
 Static - savings scale with number of hardware (gallium/drivers).
 Shared - savings scale with number of state-trackers (gallium/state-tracker).
 
 Cheers,
 Emil
 
 On 18/05/14 08:07, Emil Velikov wrote:
 Hi all

 Update of the megadriver/pipe-loader-to-all series.

 What  Why:
 Allow one to link every driver selected at configure with the target
 library, or even to allow them to use shared pipe-driver.

 Less duplication - rather than shipping three identical radeon winsys,
 8 copies of the dri ST, 3x the OMX ST you get the idea :)

 For those concerned that the driver will be be present multiple times,
 in the static library they can opt in for shared pipe-drivers.
 Note that those bare unstable interface.

 Highlights  Notes:
  * Most gallium targets are changed with a few exceptions - egl static only,
 opencl shared, and dri static/shared for drm drivers.
  * OMX target does not build (pending Christian's feedback).
  * By default we're building static libraries. Add configure option?
  * Shared ones do not work with vdpau/gl interop.

 Next:
 Janitorial:
  * Update the release notes.
  * Cleanup configure target names.
 Megaradeon
  * As per Marek's idea, push some of the cruft to the radeon winsys layer.
 DRI
  * Create a single libdricommon (similar to classic dri).
  * Driver specific __driGetDriverExtension*.
  * Merge libdridrm + libdrisw.
  * Blown classic dri style megadriver.
 EGL/CLOVER
  * Cleanup sw winsys handling and convert egl/clover.
 Symlinks
  * Cleanup the link creation (think about other platforms) and compact/unify.


 The changes can be fetches are available in the pipe-loader-to-all-or-static
 branch over at https://github.com/evelikov/Mesa/

 Build + runtime tested on my rusty old nv96 using nouveau, although I would
 appreciate additional testing :)

  * Build - automake, android and scons should just work.
  * Runtime - there should be no regressions (barring the vdpau/gl interop
 note above).

 As usual thoughts, comments and suggestions are greatly appreciated.

 Cheers
 Emil

 P.S. Did I mention that there is ~770loc savings as well :)

 

___
mesa-dev 

Re: [Mesa-dev] [PATCH] glx: Don't uselessly dlopen libGL within libGL

2014-05-29 Thread Emil Velikov
On 29/05/14 18:47, Adam Jackson wrote:
 This is entirely pointless.  The DRI driver does not need any symbols
 from libGL, even if it did libGL would already be available to resolve
 them because that's how dlopen works.
 
FWIW Reviewed-by: Emil Velikov emil.l.veli...@gmail.com

-Emil
 Signed-off-by: Adam Jackson a...@redhat.com
 ---
  src/glx/dri_common.c | 8 +---
  1 file changed, 1 insertion(+), 7 deletions(-)
 
 diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
 index e5a3f70..235d350 100644
 --- a/src/glx/dri_common.c
 +++ b/src/glx/dri_common.c
 @@ -93,14 +93,11 @@ dri_message(int level, const char *f, ...)
  _X_HIDDEN void *
  driOpenDriver(const char *driverName)
  {
 -   void *glhandle, *handle;
 +   void *handle;
 const char *libPaths, *p, *next;
 char realDriverName[200];
 int len;
  
 -   /* Attempt to make sure libGL symbols will be visible to the driver */
 -   glhandle = dlopen(libGL.so.1, RTLD_NOW | RTLD_LOCAL);
 -
 libPaths = NULL;
 if (geteuid() == getuid()) {
/* don't allow setuid apps to use LIBGL_DRIVERS_PATH */
 @@ -146,9 +143,6 @@ driOpenDriver(const char *driverName)
 if (!handle)
ErrorMessageF(unable to load driver: %s_dri.so\n, driverName);
  
 -   if (glhandle)
 -  dlclose(glhandle);
 -
 return handle;
  }
  
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 11/11] Radeonsi: Use dma_copy when possible for si_blit.

2014-05-29 Thread Axel Davy

On 29/05/2014 13:37, Marek Olšák wrote :

Hi Axel,

Could you please describe what GPU offloading means?

It's using a different GPU to render and to display.

Basically the gpu offloading support added by the patches is similar to 
enduro.

Some checks can be modified to:

info-src.box.width  0 
info-src.box.height  0 
... resource-nr_samples = 1
We discussed on irc about nr_samples, and it was not clear what the 
behaviour should be when it equals 1,

thus the decision to only allow 0 for this path.
I agree for the width and height.


You don't have to check if the box depth equals 1. You just need to
make sure the box depths are equal to each other, like widths and
heights.

Ok, thanks


Marek

Axel Davy
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 08/11] Gallium/dri2: implement blitImage

2014-05-29 Thread Axel Davy

On 29/05/2014 13:18, Marek Olšák wrote :

Hi Axel,

flush_resource should be called before ctx-st-flush.

Marek


Thanks, I'll update for the next version of the patch.

Axel Davy
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] egl-static: resolve library linking

2014-05-29 Thread Emil Velikov
On 29/05/14 16:45, Jose Fonseca wrote:
 I'm happy however you guys want to address this, as long as it builds.
 
 The only reason I added $(top_builddir)/src/loader/libloader.la was because 
 the autotools build was broken for almost a week.  I don't care as much for 
 autotools build breakage as scons build breakage, but I still don't like to 
 see it broken.
 
The vast number of configure flags cause us to have this lovely experience :(
Building with drm platform worked for me, so I did not see the issue until now.

I always keep an eye open for build breakage, although feel free to ping/cc me
if I've missed something :)

-Emil

 Jose
 
 
 - Original Message -
 On Wed, May 28, 2014 at 10:58 PM, Emil Velikov emil.l.veli...@gmail.com
 wrote:
 With DRM_PLATFORM libloader.la in linked into libEGL pleading
 to multiple defition of the loader* symbols. For every other
 platform we need to explicitly link against it.
 I would prefer to see libloader.la removed from st/egl, and added to
 egl_gallium_la_LIBADD here when HAVE_EGL_PLATFORM_DRM.  The idea is
 that we don't usually do LIBADD for state trackers, but rely on
 targets to do the right thing.


 Bugzilla:
 https://urldefense.proofpoint.com/v1/url?u=https://bugs.freedesktop.org/show_bug.cgi?id%3D79263k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=NMr9uy2iTjWVixC0wOcYCWEIYhfo80qKwRgdodpoDzA%3D%0Am=3dDDaM1y2lGalSv1W9TO0xa6X%2FalOUjK0G6lPEmYiVs%3D%0As=a5e924fedf8798d6828e4c662f03b41bec73db248e91a648e3b2f6003003bd09
 Cc: José Fonseca jfons...@vmware.com
 Cc: Chia-I Wu o...@lunarg.com
 Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
 ---
  src/gallium/targets/egl-static/Makefile.am | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

 diff --git a/src/gallium/targets/egl-static/Makefile.am
 b/src/gallium/targets/egl-static/Makefile.am
 index f4990ad..224ed95 100644
 --- a/src/gallium/targets/egl-static/Makefile.am
 +++ b/src/gallium/targets/egl-static/Makefile.am
 @@ -66,7 +66,6 @@ egl_gallium_la_SOURCES = \
 egl_st.c

  egl_gallium_la_LIBADD = \
 -   $(top_builddir)/src/loader/libloader.la \
 $(top_builddir)/src/gallium/auxiliary/libgallium.la \
 $(top_builddir)/src/gallium/drivers/identity/libidentity.la \
 $(top_builddir)/src/gallium/drivers/trace/libtrace.la \
 @@ -100,6 +99,9 @@ AM_CPPFLAGS += $(LIBDRM_CFLAGS)
  egl_gallium_la_LIBADD += \
 $(top_builddir)/src/gbm/libgbm.la \
 $(LIBDRM_LIBS)
 +else
 +egl_gallium_la_LIBADD += \
 +   $(top_builddir)/src/loader/libloader.la
  endif

  if HAVE_EGL_PLATFORM_FBDEV
 --
 1.9.3




 --
 o...@lunarg.com


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 79263] Linking error in egl_gallium.la when compiling 32 bit on multiarch

2014-05-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=79263

Emil Velikov emil.l.veli...@gmail.com changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #4 from Emil Velikov emil.l.veli...@gmail.com ---
Fixed with

commit febec731475a5b288cc50660b2d32780e395b950
Author: Emil Velikov emil.l.veli...@gmail.com
Date:   Wed May 28 15:43:35 2014 +0100

st/egl: do not link against libloader

Move the link to the final targets, like any other place in
mesa/gallium. This allows better visibilty and will prevent
us from including the library archive twice.

Resolves multiple definition of `loader_get_pci_id_for_fd'

multiple definition of `loader_get_pci_id_for_fd'

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] docs: Document how to replace Windows built-in OpenGL software rasterizer with llvmpipe.

2014-05-29 Thread jfonseca
From: José Fonseca jfons...@vmware.com

Just happened to stumble across this registry key while debugging
something else.

This technique is much neater than trying to override opengl32.dll.

Also a few minors cleanups.
---
 docs/llvmpipe.html | 52 
 1 file changed, 40 insertions(+), 12 deletions(-)

diff --git a/docs/llvmpipe.html b/docs/llvmpipe.html
index 74f0c67..291527b 100644
--- a/docs/llvmpipe.html
+++ b/docs/llvmpipe.html
@@ -43,11 +43,7 @@ It's the fastest software rasterizer for Mesa.
/p
 /li
 li
-   pLLVM: version 2.9 recommended; 2.6 or later required./p
-   pbNOTE/b: LLVM 2.8 and earlier will not work on systems that support 
the
-   Intel AVX extensions (e.g. Sandybridge).  LLVM's code generator will
-   fail when trying to emit AVX instructions.  This was fixed in LLVM 2.9.
-   /p
+   pLLVM: version 3.4 recommended; 3.1 or later required./p
p
For Linux, on a recent Debian based distribution do:
/p
@@ -101,13 +97,15 @@ but the rest of these instructions assume that scons is 
used.
 For Windows the procedure is similar except the target:
 
 pre
-  scons build=debug libgl-gdi
+  scons platform=windows build=debug libgl-gdi
 /pre
 
 
 h1Using/h1
 
-On Linux, building will create a drop-in alternative for libGL.so into
+h2Linux/h2
+
+pOn Linux, building will create a drop-in alternative for libGL.so into/p
 
 pre
   build/foo/gallium/targets/libgl-xlib/libGL.so
@@ -117,15 +115,45 @@ or
   lib/gallium/libGL.so
 /pre
 
-To use it set the LD_LIBRARY_PATH environment variable accordingly.
+pTo use it set the LD_LIBRARY_PATH environment variable accordingly./p
+
+pFor performance evaluation pass build=release to scons, and use the 
corresponding
+lib directory without the -debug suffix./p
+
 
-For performance evaluation pass debug=no to scons, and use the corresponding
-lib directory without the -debug suffix.
+h2Windows/h2
 
-On Windows, building will create a drop-in alternative for opengl32.dll. To use
-it put it in the same directory as the application. It can also be used by
+p
+On Windows, building will create
+codebuild/windows-x86-debug/gallium/targets/libgl-gdi/opengl32.dll/code
+which is a drop-in alternative for system's codeopengl32.dll/code.  To use
+it put it in the same directory as your application.  It can also be used by
 replacing the native ICD driver, but it's quite an advanced usage, so if you
 need to ask, don't even try it.
+/p
+
+p
+There is however an easy way to replace the OpenGL software renderer that comes
+with Microsoft Windows 7 (or later) with llvmpipe (that is, on systems without
+any OpenGL drivers):
+/p
+
+ul
+  lipcopy build/windows-x86-debug/gallium/targets/libgl-gdi/opengl32.dll 
to C:\Windows\SysWOW64\mesadrv.dll/p/li
+  lipload this registry settings:/p
+  preREGEDIT4
+
+; http://technet.microsoft.com/en-us/library/cc749368.aspx
+; 
http://www.msfn.org/board/topic/143241-portable-windows-7-build-from-winpe-30/page-5#entry942596
+[HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Windows 
NT\CurrentVersion\OpenGLDrivers\MSOGL]
+DLL=mesadrv.dll
+DriverVersion=dword:0001
+Flags=dword:0001
+Version=dword:0002
+/pre
+  /li
+  liDitto for 64 bits drivers if you need them./li
+/ul
 
 
 h1Profiling/h1
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 79382] build error: multiple definition of `loader_get_pci_id_for_fd'

2014-05-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=79382

Emil Velikov emil.l.veli...@gmail.com changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #5 from Emil Velikov emil.l.veli...@gmail.com ---
Pushed a slightly different patch which also fixes the issue

commit febec731475a5b288cc50660b2d32780e395b950
Author: Emil Velikov emil.l.veli...@gmail.com
Date:   Wed May 28 15:43:35 2014 +0100

st/egl: do not link against libloader

Move the link to the final targets, like any other place in
mesa/gallium. This allows better visibilty and will prevent
us from including the library archive twice.

Resolves multiple definition of `loader_get_pci_id_for_fd'

multiple definition of `loader_get_pci_id_for_fd'

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] egl-static: resolve library linking

2014-05-29 Thread Jose Fonseca


- Original Message -
 On 29/05/14 16:45, Jose Fonseca wrote:
  I'm happy however you guys want to address this, as long as it builds.
  
  The only reason I added $(top_builddir)/src/loader/libloader.la was because
  the autotools build was broken for almost a week.  I don't care as much
  for autotools build breakage as scons build breakage, but I still don't
  like to see it broken.
  
 The vast number of configure flags cause us to have this lovely experience :(

Indeed.. 

We could try to convert some of the build-time variations in compile-time 
variations, so there's more overlap in what people build.  But the flip-side is 
dependencies: building everything implies satisfying all dependencies, which 
creates its own set of problems...

 Building with drm platform worked for me, so I did not see the issue until
 now.

I see.

 I always keep an eye open for build breakage, although feel free to ping/cc
 me
 if I've missed something :)

Will do. Thanks.

Jose

 
 -Emil
 
  Jose
  
  
  - Original Message -
  On Wed, May 28, 2014 at 10:58 PM, Emil Velikov emil.l.veli...@gmail.com
  wrote:
  With DRM_PLATFORM libloader.la in linked into libEGL pleading
  to multiple defition of the loader* symbols. For every other
  platform we need to explicitly link against it.
  I would prefer to see libloader.la removed from st/egl, and added to
  egl_gallium_la_LIBADD here when HAVE_EGL_PLATFORM_DRM.  The idea is
  that we don't usually do LIBADD for state trackers, but rely on
  targets to do the right thing.
 
 
  Bugzilla:
  https://urldefense.proofpoint.com/v1/url?u=https://bugs.freedesktop.org/show_bug.cgi?id%3D79263k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=NMr9uy2iTjWVixC0wOcYCWEIYhfo80qKwRgdodpoDzA%3D%0Am=3dDDaM1y2lGalSv1W9TO0xa6X%2FalOUjK0G6lPEmYiVs%3D%0As=a5e924fedf8798d6828e4c662f03b41bec73db248e91a648e3b2f6003003bd09
  Cc: José Fonseca jfons...@vmware.com
  Cc: Chia-I Wu o...@lunarg.com
  Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
  ---
   src/gallium/targets/egl-static/Makefile.am | 4 +++-
   1 file changed, 3 insertions(+), 1 deletion(-)
 
  diff --git a/src/gallium/targets/egl-static/Makefile.am
  b/src/gallium/targets/egl-static/Makefile.am
  index f4990ad..224ed95 100644
  --- a/src/gallium/targets/egl-static/Makefile.am
  +++ b/src/gallium/targets/egl-static/Makefile.am
  @@ -66,7 +66,6 @@ egl_gallium_la_SOURCES = \
  egl_st.c
 
   egl_gallium_la_LIBADD = \
  -   $(top_builddir)/src/loader/libloader.la \
  $(top_builddir)/src/gallium/auxiliary/libgallium.la \
  $(top_builddir)/src/gallium/drivers/identity/libidentity.la \
  $(top_builddir)/src/gallium/drivers/trace/libtrace.la \
  @@ -100,6 +99,9 @@ AM_CPPFLAGS += $(LIBDRM_CFLAGS)
   egl_gallium_la_LIBADD += \
  $(top_builddir)/src/gbm/libgbm.la \
  $(LIBDRM_LIBS)
  +else
  +egl_gallium_la_LIBADD += \
  +   $(top_builddir)/src/loader/libloader.la
   endif
 
   if HAVE_EGL_PLATFORM_FBDEV
  --
  1.9.3
 
 
 
 
  --
  o...@lunarg.com
 
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Add constant folding for new opcodes

2014-05-29 Thread Tobias Klausmann

Hi,
please review the following 4 patches:

1b1cfc6 nvc0/ir: Handle OP_BFIND when folding constant expressions
d2d2727 nvc0/ir: Handle OP_POPCNT when folding constant expressions
86a1ee6 nvc0/ir: Handle reverse subop for OP_EXTBF when folding constant 
expressions

84563bf nvc0/ir: clear subop when folding constant expressions

src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 39 
+--

1 file changed, 37 insertions(+), 2 deletions(-)

Thanks,
Tobias Klausmann
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] nvc0/ir: clear subop when folding constant expressions

2014-05-29 Thread Tobias Klausmann
Some operations (e.g. OP_MUL/OP_MAD/OP_EXTBF might have a subop set.
After folding, make sure that it is cleared

Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 1a2c2e6..58092f4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -563,6 +563,7 @@ ConstantFolding::expr(Instruction *i,
} else {
   i-op = i-saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
}
+   i-subOp = 0;
 }
 
 void
-- 
1.8.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] nvc0/ir: Handle reverse subop for OP_EXTBF when folding constant expressions

2014-05-29 Thread Tobias Klausmann
Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 58092f4..93f7c2a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -529,8 +529,18 @@ ConstantFolding::expr(Instruction *i,
  lshift = 32 - width - offset;
   }
   switch (i-dType) {
-  case TYPE_S32: res.data.s32 = (a-data.s32  lshift)  rshift; break;
-  case TYPE_U32: res.data.u32 = (a-data.u32  lshift)  rshift; break;
+  case TYPE_S32: {
+ res.data.s32 = (res.data.s32  lshift)  rshift;
+ if (i-subOp == NV50_IR_SUBOP_EXTBF_REV)
+res.data.s32 = util_bitreverse(res.data.s32);
+ break;
+  }
+  case TYPE_U32: {
+ res.data.u32 = (res.data.u32  lshift)  rshift;
+ if (i-subOp == NV50_IR_SUBOP_EXTBF_REV)
+res.data.u32 = util_bitreverse(res.data.u32);
+ break;
+  }
   default:
  return;
   }
-- 
1.8.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] nvc0/ir: Handle OP_BFIND when folding constant expressions

2014-05-29 Thread Tobias Klausmann
Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 68b9a6d..a56756c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -556,6 +556,20 @@ ConstantFolding::expr(Instruction *i,
   }
   break;
}
+   case OP_BFIND: {
+  int shift = 0;
+  if (i-subOp == NV50_IR_SUBOP_BFIND_SAMT)
+ shift = 32 - (b-data.u32  0xff);
+  switch (i-dType) {
+  case TYPE_S32:
+ res.data.s32 = util_last_bit_signed(a-data.s32  shift)- 1; break;
+  case TYPE_U32:
+ res.data.u32 = util_last_bit(a-data.u32  shift) -1; break;
+  default:
+ return;
+  }
+  break;
+   }
default:
   return;
}
-- 
1.8.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] nvc0/ir: Handle OP_POPCNT when folding constant expressions

2014-05-29 Thread Tobias Klausmann
Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 93f7c2a..68b9a6d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -546,6 +546,16 @@ ConstantFolding::expr(Instruction *i,
   }
   break;
}
+   case OP_POPCNT: {
+  switch (i-dType) {
+  case TYPE_S32:
+  case TYPE_U32:
+ res.data.u32 = util_bitcount(a-data.u32  b-data.u32); break;
+  default:
+return;
+  }
+  break;
+   }
default:
   return;
}
-- 
1.8.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] nvc0/ir: Handle reverse subop for OP_EXTBF when folding constant expressions

2014-05-29 Thread Ilia Mirkin
Can you verify that you tested how the HW handles this, as well as
exactly how you did it (i.e. how did you modify the code + piglit
test, what the results were, etc)

On Thu, May 29, 2014 at 3:43 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:
 Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
 ---
  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 14 --
  1 file changed, 12 insertions(+), 2 deletions(-)

 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
 b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 index 58092f4..93f7c2a 100644
 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 @@ -529,8 +529,18 @@ ConstantFolding::expr(Instruction *i,
   lshift = 32 - width - offset;
}
switch (i-dType) {
 -  case TYPE_S32: res.data.s32 = (a-data.s32  lshift)  rshift; break;
 -  case TYPE_U32: res.data.u32 = (a-data.u32  lshift)  rshift; break;
 +  case TYPE_S32: {
 + res.data.s32 = (res.data.s32  lshift)  rshift;
 + if (i-subOp == NV50_IR_SUBOP_EXTBF_REV)
 +res.data.s32 = util_bitreverse(res.data.s32);
 + break;
 +  }
 +  case TYPE_U32: {
 + res.data.u32 = (res.data.u32  lshift)  rshift;
 + if (i-subOp == NV50_IR_SUBOP_EXTBF_REV)
 +res.data.u32 = util_bitreverse(res.data.u32);
 + break;
 +  }
default:
   return;
}
 --
 1.8.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] nvc0/ir: Handle OP_POPCNT when folding constant expressions

2014-05-29 Thread Ilia Mirkin
On Thu, May 29, 2014 at 3:43 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:
 Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
 ---
  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 10 ++
  1 file changed, 10 insertions(+)

 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
 b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 index 93f7c2a..68b9a6d 100644
 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 @@ -546,6 +546,16 @@ ConstantFolding::expr(Instruction *i,
}
break;
 }
 +   case OP_POPCNT: {
 +  switch (i-dType) {
 +  case TYPE_S32:
 +  case TYPE_U32:
 + res.data.u32 = util_bitcount(a-data.u32  b-data.u32); break;
 +  default:
 +return;
 +  }
 +  break;
 +   }

Why does the data type matter? I think you can handle this like AND is
handled -- just always do it.

Also, please add support for a single-argument version of POPCNT. This
will happen as a result of lowering for sm50 (maxwell), for which it's
a 1-arg instruction. Basically you need to add a case to opnd() which
checks that there's only one argument and does it accordingly. [The
existing function assumes a float op, and it'd be a pain to modify
it.]

 default:
return;
 }
 --
 1.8.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] nvc0/ir: Handle reverse subop for OP_EXTBF when folding constant expressions

2014-05-29 Thread Tobias Klausmann

Tested with:
MESA_EXTENSION_OVERRIDE=GL_ARB_gpu_shader5 ./shader_runner 
../tests/spec/arb_gpu_shader5/execution/built-in-functions/fs-bitfieldReverse.shader_test 
- green output, so this should be ok


the test was not change though...


On 29.05.2014 21:47, Ilia Mirkin wrote:

Can you verify that you tested how the HW handles this, as well as
exactly how you did it (i.e. how did you modify the code + piglit
test, what the results were, etc)

On Thu, May 29, 2014 at 3:43 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:

Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
---
  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 14 --
  1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 58092f4..93f7c2a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -529,8 +529,18 @@ ConstantFolding::expr(Instruction *i,
   lshift = 32 - width - offset;
}
switch (i-dType) {
-  case TYPE_S32: res.data.s32 = (a-data.s32  lshift)  rshift; break;
-  case TYPE_U32: res.data.u32 = (a-data.u32  lshift)  rshift; break;
+  case TYPE_S32: {
+ res.data.s32 = (res.data.s32  lshift)  rshift;
+ if (i-subOp == NV50_IR_SUBOP_EXTBF_REV)
+res.data.s32 = util_bitreverse(res.data.s32);
+ break;
+  }
+  case TYPE_U32: {
+ res.data.u32 = (res.data.u32  lshift)  rshift;
+ if (i-subOp == NV50_IR_SUBOP_EXTBF_REV)
+res.data.u32 = util_bitreverse(res.data.u32);
+ break;
+  }
default:
   return;
}
--
1.8.4.5



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] nvc0/ir: Handle OP_BFIND when folding constant expressions

2014-05-29 Thread Ilia Mirkin
How did you test this? I'm like 99% sure it's wrong. (But I'm avoiding
saying how it's wrong so that you don't just fix it to match what I
say, but instead work it out yourself by doing the tests that you
should have been doing in the first place. Or prove me wrong.)

On Thu, May 29, 2014 at 3:43 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:
 Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
 ---
  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 14 ++
  1 file changed, 14 insertions(+)

 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
 b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 index 68b9a6d..a56756c 100644
 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 @@ -556,6 +556,20 @@ ConstantFolding::expr(Instruction *i,
}
break;
 }
 +   case OP_BFIND: {
 +  int shift = 0;
 +  if (i-subOp == NV50_IR_SUBOP_BFIND_SAMT)
 + shift = 32 - (b-data.u32  0xff);
 +  switch (i-dType) {
 +  case TYPE_S32:
 + res.data.s32 = util_last_bit_signed(a-data.s32  shift)- 1; break;
 +  case TYPE_U32:
 + res.data.u32 = util_last_bit(a-data.u32  shift) -1; break;
 +  default:
 + return;
 +  }
 +  break;
 +   }
 default:
return;
 }
 --
 1.8.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] nvc0/ir: Handle reverse subop for OP_EXTBF when folding constant expressions

2014-05-29 Thread Ilia Mirkin
I think you misunderstood my suggestion. This code won't actually get
executed when running that test. What I was suggesting was to change
the shift/width argument passed to extbf when converting BREV, which
should in turn cause the test to start failing. The way in which you
need to modify the expected value the test generates will dictate
whether the bit reverse happens before or after the bitfield
extraction.

On Thu, May 29, 2014 at 3:53 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:
 Tested with:
 MESA_EXTENSION_OVERRIDE=GL_ARB_gpu_shader5 ./shader_runner
 ../tests/spec/arb_gpu_shader5/execution/built-in-functions/fs-bitfieldReverse.shader_test
 - green output, so this should be ok

 the test was not change though...



 On 29.05.2014 21:47, Ilia Mirkin wrote:

 Can you verify that you tested how the HW handles this, as well as
 exactly how you did it (i.e. how did you modify the code + piglit
 test, what the results were, etc)

 On Thu, May 29, 2014 at 3:43 PM, Tobias Klausmann
 tobias.johannes.klausm...@mni.thm.de wrote:

 Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
 ---
   src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 14
 --
   1 file changed, 12 insertions(+), 2 deletions(-)

 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 index 58092f4..93f7c2a 100644
 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
 @@ -529,8 +529,18 @@ ConstantFolding::expr(Instruction *i,
lshift = 32 - width - offset;
 }
 switch (i-dType) {
 -  case TYPE_S32: res.data.s32 = (a-data.s32  lshift)  rshift;
 break;
 -  case TYPE_U32: res.data.u32 = (a-data.u32  lshift)  rshift;
 break;
 +  case TYPE_S32: {
 + res.data.s32 = (res.data.s32  lshift)  rshift;
 + if (i-subOp == NV50_IR_SUBOP_EXTBF_REV)
 +res.data.s32 = util_bitreverse(res.data.s32);
 + break;
 +  }
 +  case TYPE_U32: {
 + res.data.u32 = (res.data.u32  lshift)  rshift;
 + if (i-subOp == NV50_IR_SUBOP_EXTBF_REV)
 +res.data.u32 = util_bitreverse(res.data.u32);
 + break;
 +  }
 default:
return;
 }
 --
 1.8.4.5


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: Eliminate unused built-in variables after compilation

2014-05-29 Thread Anuj Phogat
On Wed, May 28, 2014 at 6:35 PM, Ian Romanick i...@freedesktop.org wrote:
 From: Ian Romanick ian.d.roman...@intel.com

 After compilation (and before linking) we can eliminate quite a few
 built-in variables.  Basically, any uniform or constant (e.g.,
 gl_MaxVertexTextureImageUnits) that isn't used (with one exception) can
 be eliminated.  System values, vertex shader inputs (with one
 exception), and fragment shader outputs that are not used and not
 re-declared in the shader text can also be removed.

 gl_ModelViewProjectMatrix and gl_Vertex are used by the built-in
 function ftransform.  There are some complications with eliminating
 these variables (see the comment in the patch), so they are not
 eliminated.

 Reduces the peak ir_variable memory usage in a trimmed apitrace of dota2
 by 3.5MB on 64-bit.

 Before: IR MEM: variable usage / name / total: 5327760 894914 6222674
 After:  IR MEM: variable usage / name / total: 2156568 318192 2474760

 Reduces the peak ir_variable memory usage in a trimmed apitrace of dota2
 by 2.8MB on 32-bit.

 Before: IR MEM: variable usage / name / total: 4118280 644100 4762380
 After:  IR MEM: variable usage / name / total: 1473408 256871 1730279

 Signed-off-by: Ian Romanick ian.d.roman...@intel.com
 Suggested-by: Eric Anholt e...@anholt.net
 Cc: Eric Anholt e...@anholt.net
 Cc: Tapani Pälli tapani.pa...@intel.com
 Cc: Eero Tamminen eero.t.tammi...@intel.com
 ---
  src/glsl/Makefile.sources   |  1 +
  src/glsl/glsl_parser_extras.cpp | 20 
  src/glsl/ir_optimization.h  |  2 +
  src/glsl/opt_dead_builtin_variables.cpp | 82 
 +
  4 files changed, 105 insertions(+)
  create mode 100644 src/glsl/opt_dead_builtin_variables.cpp

 diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
 index 6e230f7..a733323 100644
 --- a/src/glsl/Makefile.sources
 +++ b/src/glsl/Makefile.sources
 @@ -88,6 +88,7 @@ LIBGLSL_FILES = \
 $(GLSL_SRCDIR)/opt_copy_propagation.cpp \
 $(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \
 $(GLSL_SRCDIR)/opt_cse.cpp \
 +   $(GLSL_SRCDIR)/opt_dead_builtin_variables.cpp \
 $(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \
 $(GLSL_SRCDIR)/opt_dead_code.cpp \
 $(GLSL_SRCDIR)/opt_dead_code_local.cpp \
 diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
 index d3339e7..323cb23 100644
 --- a/src/glsl/glsl_parser_extras.cpp
 +++ b/src/glsl/glsl_parser_extras.cpp
 @@ -1485,6 +1485,26 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, 
 struct gl_shader *shader,
   ;

validate_ir_tree(shader-ir);
 +
 +  enum ir_variable_mode other;
 +  switch (shader-Stage) {
 +  case MESA_SHADER_VERTEX:
 + other = ir_var_shader_in;
 + break;
 +  case MESA_SHADER_FRAGMENT:
 + other = ir_var_shader_out;
 + break;
 +  default:
 + /* Something invalide to ensure optimize_dead_builtin_uniforms
invalid?
 +  * doesn't remove anything other than uniforms or constants.
 +  */
 + other = ir_var_mode_count;
 + break;
 +  }
 +
 +  optimize_dead_builtin_variables(shader-ir, other);
 +
 +  validate_ir_tree(shader-ir);
 }

 if (shader-InfoLog)
 diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
 index c63921c..2dfd81e 100644
 --- a/src/glsl/ir_optimization.h
 +++ b/src/glsl/ir_optimization.h
 @@ -122,6 +122,8 @@ void lower_named_interface_blocks(void *mem_ctx, 
 gl_shader *shader);
  bool optimize_redundant_jumps(exec_list *instructions);
  bool optimize_split_arrays(exec_list *instructions, bool linked);
  bool lower_offset_arrays(exec_list *instructions);
 +void optimize_dead_builtin_variables(exec_list *instructions,
 + enum ir_variable_mode other);

  ir_rvalue *
  compare_index_block(exec_list *instructions, ir_variable *index,
 diff --git a/src/glsl/opt_dead_builtin_variables.cpp 
 b/src/glsl/opt_dead_builtin_variables.cpp
 new file mode 100644
 index 000..b68e720
 --- /dev/null
 +++ b/src/glsl/opt_dead_builtin_variables.cpp
 @@ -0,0 +1,82 @@
 +/*
 + * Copyright © 2014 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the Software),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE 

[Mesa-dev] [RFC PATCH 00/16] Gen6 render surface state changes

2014-05-29 Thread Jordan Justen
The goal for this series was to allow layered rendering to work with
gen6. It also fixes 10 piglit tests, and a performance regression bug
(https://bugs.freedesktop.org/show_bug.cgi?id=56127).

This series is available on my gen6-layered branch in
git://people.freedesktop.org/~jljusten/mesa

I ran piglit's quick suite on gen6 and gm45:
gen6: 10 fixes, 0 regressions
gm45: no changes

The last five patches in the series are a bit hackish, but I had
trouble finding a better looking solution that also kept stencil  hiz
happy. (These patches are the RFC part :)

Jordan Justen (16):
  i965: Split gen6 renderbuffer surface state from gen5 and older
  i965/gen6: add support for layered renderbuffers
  i965/gen6: Align height to 2 with MSAA for certain surface heights
  i965: Split gen6 depth state out from brw
  i965/gen6 depth surface: calculate more specific surface type
  i965/gen6 depth surface: calculate depth (array size) for depth
surface
  i965/gen6 depth surface: calculate LOD being rendered to
  i965/gen6 depth surface: calculate minimum array element being
rendered
  i965/gen6 blorp depth: calculate base surface width/height
  i965/gen6 fbo: make unmatched depth/stencil configs return unsupported
  i965/gen6 depth surface: program 3DSTATE_DEPTH_BUFFER to top of
surface
  i965: Allow forcing a LOD0 spacing miptree layout
  i965: Support array_spacing_lod0 for multiple miplevels
  i965/gen6: Stencil/hiz needs an offset for LOD  0
  i965/gen6: Force W tiling alignment with array_spacing_lod0
  i965/gen6: Force array_spacing_lod0 for stencil/hiz

 src/mesa/drivers/dri/i965/Makefile.sources |   2 +
 src/mesa/drivers/dri/i965/brw_context.c|   4 +
 src/mesa/drivers/dri/i965/brw_context.h|  13 +
 src/mesa/drivers/dri/i965/brw_defines.h|   2 +
 src/mesa/drivers/dri/i965/brw_misc_state.c |   4 +-
 src/mesa/drivers/dri/i965/brw_state.h  |   4 +
 src/mesa/drivers/dri/i965/brw_state_upload.c   |   2 +-
 src/mesa/drivers/dri/i965/brw_tex_layout.c |  37 ++-
 src/mesa/drivers/dri/i965/gen6_blorp.cpp   | 115 
 src/mesa/drivers/dri/i965/gen6_depth_state.c   | 368 +
 src/mesa/drivers/dri/i965/gen6_surface_state.c | 157 +++
 src/mesa/drivers/dri/i965/intel_fbo.c  |   7 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  |  33 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |   6 +-
 src/mesa/drivers/dri/i965/intel_tex.c  |   3 +-
 src/mesa/drivers/dri/i965/intel_tex_image.c|   3 +-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c |   3 +-
 src/mesa/drivers/dri/i965/intel_tex_validate.c |   3 +-
 18 files changed, 691 insertions(+), 75 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen6_depth_state.c
 create mode 100644 src/mesa/drivers/dri/i965/gen6_surface_state.c

-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 02/16] i965/gen6: add support for layered renderbuffers

2014-05-29 Thread Jordan Justen
Rather than pointing the surface_state directly at a single
sub-image of the texture for rendering, we now point the
surface_state at the top level of the texture, and configure
the surface_state as needed based on this.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/brw_defines.h|  2 +
 src/mesa/drivers/dri/i965/gen6_surface_state.c | 71 +-
 2 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index c38e447..d010d61 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -548,6 +548,8 @@
 /* Surface state DW4 */
 #define BRW_SURFACE_MIN_LOD_SHIFT  28
 #define BRW_SURFACE_MIN_LOD_MASK   INTEL_MASK(31, 28)
+#define BRW_SURFACE_MIN_ARRAY_ELEMENT_SHIFT17
+#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT8
 #define BRW_SURFACE_MULTISAMPLECOUNT_1  (0  4)
 #define BRW_SURFACE_MULTISAMPLECOUNT_4  (2  4)
 #define GEN7_SURFACE_MULTISAMPLECOUNT_1 (0  3)
diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c 
b/src/mesa/drivers/dri/i965/gen6_surface_state.c
index 9fec372..728488a 100644
--- a/src/mesa/drivers/dri/i965/gen6_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c
@@ -26,6 +26,7 @@
 #include main/blend.h
 #include main/mtypes.h
 #include main/samplerobj.h
+#include main/texformat.h
 #include program/prog_parameter.h
 
 #include intel_mipmap_tree.h
@@ -54,30 +55,18 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_mipmap_tree *mt = irb-mt;
uint32_t *surf;
-   uint32_t tile_x, tile_y;
uint32_t format = 0;
/* _NEW_BUFFERS */
mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
+   uint32_t surftype;
+   int depth = MAX2(rb-Depth, 1);
+   int min_array_element;
+   GLenum gl_target = rb-TexImage ?
+ rb-TexImage-TexObject-Target : GL_TEXTURE_2D;
+
uint32_t surf_index =
   brw-wm.prog_data-binding_table.render_target_start + unit;
 
-   assert(!layered);
-
-   if (rb-TexImage  !brw-has_surface_tile_offset) {
-  intel_renderbuffer_get_tile_offsets(irb, tile_x, tile_y);
-
-  if (tile_x != 0 || tile_y != 0) {
-/* Original gen4 hardware couldn't draw to a non-tile-aligned
- * destination in a miptree unless you actually setup your renderbuffer
- * as a miptree and used the fragile lod/array_index/etc. controls to
- * select the image.  So, instead, we just make a new single-level
- * miptree and render into that.
- */
-intel_renderbuffer_move_to_temp(brw, irb, false);
-mt = irb-mt;
-  }
-   }
-
intel_miptree_used_for_rendering(irb-mt);
 
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
@@ -89,30 +78,42 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
 __FUNCTION__, _mesa_get_format_name(rb_format));
}
 
-   surf[0] = (BRW_SURFACE_2D  BRW_SURFACE_TYPE_SHIFT |
- format  BRW_SURFACE_FORMAT_SHIFT);
+   switch (gl_target) {
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+  surftype = BRW_SURFACE_2D;
+  depth *= 6;
+  break;
+   default:
+  surftype = translate_tex_target(gl_target);
+  break;
+   }
+
+   if (layered) {
+  min_array_element = 0;
+   } else {
+  min_array_element = irb-mt_layer;
+   }
+
+   surf[0] = (surftype  BRW_SURFACE_TYPE_SHIFT |
+ format  BRW_SURFACE_FORMAT_SHIFT);
 
/* reloc */
-   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, tile_x, tile_y) +
- mt-bo-offset64);
+   surf[1] = mt-bo-offset64;
 
-   surf[2] = ((rb-Width - 1)  BRW_SURFACE_WIDTH_SHIFT |
- (rb-Height - 1)  BRW_SURFACE_HEIGHT_SHIFT);
+   surf[2] = ((irb-mt-logical_width0 - 1)  BRW_SURFACE_WIDTH_SHIFT |
+ (irb-mt-logical_height0 - 1)  BRW_SURFACE_HEIGHT_SHIFT |
+  ((irb-mt_level - irb-mt-first_level))  
BRW_SURFACE_LOD_SHIFT);
 
surf[3] = (brw_get_surface_tiling_bits(mt-tiling) |
+ depth  BRW_SURFACE_DEPTH_SHIFT |
  (mt-pitch - 1)  BRW_SURFACE_PITCH_SHIFT);
 
-   surf[4] = brw_get_surface_num_multisamples(mt-num_samples);
-
-   assert(brw-has_surface_tile_offset || (tile_x == 0  tile_y == 0));
-   /* Note that the low bits of these fields are missing, so
-* there's the possibility of getting in trouble.
-*/
-   assert(tile_x % 4 == 0);
-   assert(tile_y % 2 == 0);
-   surf[5] = ((tile_x / 4)  BRW_SURFACE_X_OFFSET_SHIFT |
- (tile_y / 2)  BRW_SURFACE_Y_OFFSET_SHIFT |
- (mt-align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
+   surf[4] = brw_get_surface_num_multisamples(mt-num_samples) |
+ min_array_element  BRW_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
+ depth  

[Mesa-dev] [RFC PATCH 05/16] i965/gen6 depth surface: calculate more specific surface type

2014-05-29 Thread Jordan Justen
(171e633 for gen6)

This will be used in 3DSTATE_DEPTH_BUFFER in a later patch.

Note: Cube maps are treated as 2D arrays with 6 times as
many array elements as the cube map array would have.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 17 ++
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 33 
 2 files changed, 50 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index fe065e2..04afa24 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -792,6 +792,23 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t draw_x = params-depth.x_offset;
uint32_t draw_y = params-depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
+   uint32_t surftype;
+   GLenum gl_target = params-depth.mt-target;
+
+   switch (gl_target) {
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+  /* The PRM claims that we should use BRW_SURFACE_CUBE for this
+   * situation, but experiments show that gl_Layer doesn't work when we do
+   * this.  So we use BRW_SURFACE_2D, since for rendering purposes this is
+   * equivalent.
+   */
+  surftype = BRW_SURFACE_2D;
+  break;
+   default:
+  surftype = translate_tex_target(gl_target);
+  break;
+   }
 
brw_get_depthstencil_tile_masks(params-depth.mt,
params-depth.level,
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index 4c659c6..9066dc8 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -30,6 +30,7 @@
 #include brw_state.h
 #include brw_defines.h
 
+#include main/mtypes.h
 #include main/fbobject.h
 #include main/glformats.h
 
@@ -131,6 +132,13 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
 uint32_t width, uint32_t height,
 uint32_t tile_x, uint32_t tile_y)
 {
+   struct gl_context *ctx = brw-ctx;
+   struct gl_framebuffer *fb = ctx-DrawBuffer;
+   uint32_t surftype;
+   GLenum gl_target = GL_TEXTURE_2D;
+   const struct intel_renderbuffer *irb = NULL;
+   const struct gl_renderbuffer *rb = NULL;
+
/* Enable the hiz bit if we're doing separate stencil, because it and the
 * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
 * 3DSTATE_DEPTH_BUFFER, Bit 1.21 Separate Stencil Enable:
@@ -151,6 +159,31 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
   intel_emit_depth_stall_flushes(brw);
}
 
+   irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   if (!irb)
+  irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   rb = (struct gl_renderbuffer*) irb;
+
+   if (rb) {
+  if (rb-TexImage)
+ gl_target = rb-TexImage-TexObject-Target;
+   }
+
+   switch (gl_target) {
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+  /* The PRM claims that we should use BRW_SURFACE_CUBE for this
+   * situation, but experiments show that gl_Layer doesn't work when we do
+   * this.  So we use BRW_SURFACE_2D, since for rendering purposes this is
+   * equivalent.
+   */
+  surftype = BRW_SURFACE_2D;
+  break;
+   default:
+  surftype = translate_tex_target(gl_target);
+  break;
+   }
+
unsigned int len;
if (brw-gen = 6)
   len = 7;
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 04/16] i965: Split gen6 depth state out from brw

2014-05-29 Thread Jordan Justen
We will program the gen6 depth state differently to enable layered
rendering on gen6.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/Makefile.sources   |   1 +
 src/mesa/drivers/dri/i965/brw_context.c  |   2 +-
 src/mesa/drivers/dri/i965/brw_context.h  |  13 ++
 src/mesa/drivers/dri/i965/brw_state.h|   1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c |   2 +-
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 273 +++
 6 files changed, 290 insertions(+), 2 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen6_depth_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 1871449..bcd7930 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -121,6 +121,7 @@ i965_FILES = \
gen6_blorp.cpp \
gen6_cc.c \
gen6_clip_state.c \
+   gen6_depth_state.c \
gen6_depthstencil.c \
gen6_gs_state.c \
 gen6_multisample_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index d35dea5..ddee815 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -642,7 +642,7 @@ brwCreateContext(gl_api api,
} else if (brw-gen = 6) {
   gen6_init_vtable_surface_functions(brw);
   gen4_init_vtable_sampler_functions(brw);
-  brw-vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
+  brw-vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
} else {
   gen4_init_vtable_surface_functions(brw);
   gen4_init_vtable_sampler_functions(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 283c576..3f10d55 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1819,6 +1819,19 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
uint32_t tile_x, uint32_t tile_y);
 
 void
+gen6_emit_depthbuffer(struct brw_context *brw);
+
+void
+gen6_emit_depth_stencil_hiz(struct brw_context *brw,
+struct intel_mipmap_tree *depth_mt,
+uint32_t depth_offset, uint32_t depthbuffer_format,
+uint32_t depth_surface_type,
+struct intel_mipmap_tree *stencil_mt,
+bool hiz, bool separate_stencil,
+uint32_t width, uint32_t height,
+uint32_t tile_x, uint32_t tile_y);
+
+void
 gen7_emit_depth_stencil_hiz(struct brw_context *brw,
 struct intel_mipmap_tree *depth_mt,
 uint32_t depth_offset, uint32_t depthbuffer_format,
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 1669b77..a8051cd 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -100,6 +100,7 @@ extern const struct brw_tracked_state gen6_clip_state;
 extern const struct brw_tracked_state gen6_clip_vp;
 extern const struct brw_tracked_state gen6_color_calc_state;
 extern const struct brw_tracked_state gen6_depth_stencil_state;
+extern const struct brw_tracked_state gen6_depthbuffer;
 extern const struct brw_tracked_state gen6_gs_state;
 extern const struct brw_tracked_state gen6_gs_binding_table;
 extern const struct brw_tracked_state gen6_multisample_state;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 3a452c3..15e61d4 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -159,7 +159,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
 
gen6_binding_table_pointers,
 
-   brw_depthbuffer,
+   gen6_depthbuffer,
 
brw_polygon_stipple,
brw_polygon_stipple_offset,
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
new file mode 100644
index 000..4c659c6
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, 

[Mesa-dev] [RFC PATCH 15/16] i965/gen6: Force W tiling alignment with array_spacing_lod0

2014-05-29 Thread Jordan Justen
Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 8a0912d..9293229 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -318,9 +318,19 @@ void
 brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
 {
bool multisampled = mt-num_samples  1;
-   mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt-format);
-   mt-align_h =
-  intel_vertical_texture_alignment_unit(brw, mt-format, multisampled);
+
+   if (brw-gen == 6  mt-array_spacing_lod0) {
+  /* On gen6, we use array_spacing_lod0 for stencil/hiz because the
+ hardware doesn't support multiple mip levels on stencil. Stencil uses
+ W tiling, so we force W tiling alignment when array_spacing_lod0 is
+ requested. */
+  mt-align_w = 64;
+  mt-align_h = 64;
+   } else {
+  mt-align_w = intel_horizontal_texture_alignment_unit(brw, mt-format);
+  mt-align_h =
+ intel_vertical_texture_alignment_unit(brw, mt-format, multisampled);
+   }
 
switch (mt-target) {
case GL_TEXTURE_CUBE_MAP:
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 14/16] i965/gen6: Stencil/hiz needs an offset for LOD 0

2014-05-29 Thread Jordan Justen
Since gen6 stencil only supports LOD0, we need to program an offset to
the LOD when emitting the stencil/hiz.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 10 -
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 32 ++--
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 7be8ccd..920f435 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -872,13 +872,21 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
/* 3DSTATE_HIER_DEPTH_BUFFER */
{
   struct intel_mipmap_tree *hiz_mt = params-depth.mt-hiz_mt;
+  uint32_t offset = 0;
+
+  if (hiz_mt-array_spacing_lod0) {
+ offset = intel_miptree_get_aligned_offset(hiz_mt,
+   hiz_mt-level[lod].level_x,
+   hiz_mt-level[lod].level_y,
+   false);
+  }
 
   BEGIN_BATCH(3);
   OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER  16) | (3 - 2));
   OUT_BATCH(hiz_mt-pitch - 1);
   OUT_RELOC(hiz_mt-bo,
 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-0);
+offset);
   ADVANCE_BATCH();
}
 
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index 3ef2fab..8fcc0a8 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -271,12 +271,21 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
   /* Emit hiz buffer. */
   if (hiz) {
  struct intel_mipmap_tree *hiz_mt = depth_mt-hiz_mt;
+ uint32_t offset = 0;
+
+ if (hiz_mt-array_spacing_lod0) {
+offset = intel_miptree_get_aligned_offset(hiz_mt,
+  
hiz_mt-level[lod].level_x,
+  
hiz_mt-level[lod].level_y,
+  false);
+ }
+
 BEGIN_BATCH(3);
 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER  16) | (3 - 2));
 OUT_BATCH(hiz_mt-pitch - 1);
 OUT_RELOC(hiz_mt-bo,
   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-  0);
+  offset);
 ADVANCE_BATCH();
   } else {
 BEGIN_BATCH(3);
@@ -288,6 +297,25 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
 
   /* Emit stencil buffer. */
   if (separate_stencil) {
+ uint32_t offset = 0;
+
+ if (stencil_mt-array_spacing_lod0) {
+if (stencil_mt-format == MESA_FORMAT_S_UINT8) {
+   /* Note: we can't compute the stencil offset using
+* intel_region_get_aligned_offset(), because stencil_region 
claims
+* that the region is untiled even though it's W tiled.
+*/
+   offset =
+  stencil_mt-level[lod].level_y * stencil_mt-pitch +
+  stencil_mt-level[lod].level_x * 64;
+} else {
+   offset = intel_miptree_get_aligned_offset(stencil_mt,
+ 
stencil_mt-level[lod].level_x,
+ 
stencil_mt-level[lod].level_y,
+ false);
+}
+ }
+
 BEGIN_BATCH(3);
 OUT_BATCH((_3DSTATE_STENCIL_BUFFER  16) | (3 - 2));
  /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
@@ -298,7 +326,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
 OUT_BATCH(2 * stencil_mt-pitch - 1);
 OUT_RELOC(stencil_mt-bo,
   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-  0);
+  offset);
 ADVANCE_BATCH();
   } else {
 BEGIN_BATCH(3);
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 09/16] i965/gen6 blorp depth: calculate base surface width/height

2014-05-29 Thread Jordan Justen
(e3a49e1 for gen6)

This will be used in 3DSTATE_DEPTH_BUFFER in a later patch.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 5a55676..f14ea90 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -792,6 +792,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t draw_x = params-depth.x_offset;
uint32_t draw_y = params-depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
+   uint32_t surfwidth, surfheight;
uint32_t surftype;
unsigned int depth = MAX2(params-depth.mt-logical_depth0, 1);
unsigned int min_array_element;
@@ -824,6 +825,18 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
 
lod = params-depth.level - params-depth.mt-first_level;
 
+   if (params-hiz_op != GEN6_HIZ_OP_NONE  lod == 0) {
+  /* HIZ ops for lod 0 may set the width  height a little
+   * larger to allow the fast depth clear to fit the hardware
+   * alignment requirements. (8x4)
+   */
+  surfwidth = params-depth.width;
+  surfheight = params-depth.height;
+   } else {
+  surfwidth = params-depth.mt-logical_width0;
+  surfheight = params-depth.mt-logical_height0;
+   }
+
/* 3DSTATE_DEPTH_BUFFER */
{
   uint32_t tile_x = draw_x  tile_mask_x;
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 10/16] i965/gen6 fbo: make unmatched depth/stencil configs return unsupported

2014-05-29 Thread Jordan Justen
(f3c886b for gen6)

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/intel_fbo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index e43e18b..22f707f 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -673,8 +673,8 @@ intel_validate_framebuffer(struct gl_context *ctx, struct 
gl_framebuffer *fb)
}
 
if (depth_mt  stencil_mt) {
-  if (brw-gen = 7) {
- /* For gen = 7, we are using the lod/minimum-array-element fields
+  if (brw-gen = 6) {
+ /* For gen = 6, we are using the lod/minimum-array-element fields
   * and supportting layered rendering. This means that we must restrict
   * the depth  stencil attachments to match in various more retrictive
   * ways. (width, height, depth, LOD and layer)
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 11/16] i965/gen6 depth surface: program 3DSTATE_DEPTH_BUFFER to top of surface

2014-05-29 Thread Jordan Justen
(bf25ee2 for gen6)

Previously we would always find the 2D sub-surface of interest,
and then program the surface to this location. Now we always
program the 3DSTATE_DEPTH_BUFFER at the start of the surface.
To select the lod/slice, we utilize the lod  minimum array
element fields.

We also must disable brw_workaround_depthstencil_alignment for
gen = 6. Now the hardware will handle alignment when rendering
to additional slices/LODs.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56127
---
 src/mesa/drivers/dri/i965/brw_misc_state.c   |  4 +-
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 71 +---
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 32 ++---
 3 files changed, 49 insertions(+), 58 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 76e22bd..e3980fc 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -261,10 +261,10 @@ brw_workaround_depthstencil_alignment(struct brw_context 
*brw,
if (stencil_irb)
   brw-depthstencil.stencil_mt = get_stencil_miptree(stencil_irb);
 
-   /* Gen7+ doesn't require the workarounds, since we always program the
+   /* Gen6+ doesn't require the workarounds, since we always program the
 * surface state at the start of the whole surface.
 */
-   if (brw-gen = 7)
+   if (brw-gen = 6)
   return;
 
/* Check if depth buffer is in depth/stencil format.  If so, then it's only
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index f14ea90..7be8ccd 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -788,10 +788,6 @@ static void
 gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
  const brw_blorp_params *params)
 {
-   struct gl_context *ctx = brw-ctx;
-   uint32_t draw_x = params-depth.x_offset;
-   uint32_t draw_y = params-depth.y_offset;
-   uint32_t tile_mask_x, tile_mask_y;
uint32_t surfwidth, surfheight;
uint32_t surftype;
unsigned int depth = MAX2(params-depth.mt-logical_depth0, 1);
@@ -815,12 +811,6 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
   break;
}
 
-   brw_get_depthstencil_tile_masks(params-depth.mt,
-   params-depth.level,
-   params-depth.layer,
-   NULL,
-   tile_mask_x, tile_mask_y);
-
min_array_element = params-depth.layer;
 
lod = params-depth.level - params-depth.mt-first_level;
@@ -839,55 +829,42 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
 
/* 3DSTATE_DEPTH_BUFFER */
{
-  uint32_t tile_x = draw_x  tile_mask_x;
-  uint32_t tile_y = draw_y  tile_mask_y;
-  uint32_t offset =
- intel_miptree_get_aligned_offset(params-depth.mt,
-  draw_x  ~tile_mask_x,
-  draw_y  ~tile_mask_y, false);
-
-  /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
-   * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for Depth
-   * Coordinate Offset X/Y:
-   *
-   *   The 3 LSBs of both offsets must be zero to ensure correct
-   *   alignment
-   *
-   * We have no guarantee that tile_x and tile_y are correctly aligned,
-   * since they are determined by the mipmap layout, which is only aligned
-   * to multiples of 4.
-   *
-   * So, to avoid hanging the GPU, just smash the low order 3 bits of
-   * tile_x and tile_y to 0.  This is a temporary workaround until we come
-   * up with a better solution.
-   */
-  WARN_ONCE((tile_x  7) || (tile_y  7),
-Depth/stencil buffer needs alignment to 8-pixel boundaries.\n
-Truncating offset, bad rendering may occur.\n);
-  tile_x = ~7;
-  tile_y = ~7;
-
   intel_emit_post_sync_nonzero_flush(brw);
   intel_emit_depth_stall_flushes(brw);
 
   BEGIN_BATCH(7);
+  /* 3DSTATE_DEPTH_BUFFER dw0 */
   OUT_BATCH(_3DSTATE_DEPTH_BUFFER  16 | (7 - 2));
+
+  /* 3DSTATE_DEPTH_BUFFER dw1 */
   OUT_BATCH((params-depth.mt-pitch - 1) |
 params-depth_format  18 |
 1  21 | /* separate stencil enable */
 1  22 | /* hiz enable */
 BRW_TILEWALK_YMAJOR  26 |
 1  27 | /* y-tiled */
-BRW_SURFACE_2D  29);
+surftype  29);
+
+  /* 3DSTATE_DEPTH_BUFFER dw2 */
   OUT_RELOC(params-depth.mt-bo,
 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-offset);
+0);
+
+  /* 3DSTATE_DEPTH_BUFFER dw3 */
   OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW  1 |
-

[Mesa-dev] [RFC PATCH 03/16] i965/gen6: Align height to 2 with MSAA for certain surface heights

2014-05-29 Thread Jordan Justen
TODO: Find a good documented reason for this change.

This could be related to PRM Volume 1 Part 1: Graphics Core, Section
7.18.3.7.1 (Surface Arrays For all surfaces other than separate
stencil buffer):

 [DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
  value calculated in the equation above , for every other odd Surface
  Height starting from 1 i.e. 1,5,9,13

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/gen6_surface_state.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c 
b/src/mesa/drivers/dri/i965/gen6_surface_state.c
index 728488a..715111f 100644
--- a/src/mesa/drivers/dri/i965/gen6_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c
@@ -101,8 +101,12 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
/* reloc */
surf[1] = mt-bo-offset64;
 
+   int height0 = irb-mt-logical_height0;
+   if (brw-gen == 6  irb-mt-num_samples  1  (height0 % 4) == 1)
+  height0 = ALIGN(height0, 2);
+
surf[2] = ((irb-mt-logical_width0 - 1)  BRW_SURFACE_WIDTH_SHIFT |
- (irb-mt-logical_height0 - 1)  BRW_SURFACE_HEIGHT_SHIFT |
+ (height0 - 1)  BRW_SURFACE_HEIGHT_SHIFT |
   ((irb-mt_level - irb-mt-first_level))  
BRW_SURFACE_LOD_SHIFT);
 
surf[3] = (brw_get_surface_tiling_bits(mt-tiling) |
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 16/16] i965/gen6: Force array_spacing_lod0 for stencil/hiz

2014-05-29 Thread Jordan Justen
gen6 stencil only supports LOD0 for stencil, forcing us to use an
array_spacing_lod0 type of miptree layout.

We use a combination of array_spacing_lod0 and a regular miptree in
that all slices of a particular LOD are combined without spacing for
additional LODs. This allows the surface to be set up like the
hardware expects, and also tries to minimize the overall texture size.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 2d4224c..aff00b0 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -367,6 +367,7 @@ intel_miptree_create_layout(struct brw_context *brw,
_mesa_get_format_base_format(format) == GL_DEPTH_STENCIL 
(brw-must_use_separate_stencil ||
(brw-has_separate_stencil  brw_is_hiz_depth_format(brw, format {
+  bool separate_lods = brw-gen == 6;
   mt-stencil_mt = intel_miptree_create(brw,
 mt-target,
 MESA_FORMAT_S_UINT8,
@@ -378,7 +379,7 @@ intel_miptree_create_layout(struct brw_context *brw,
 true,
 num_samples,
 INTEL_MIPTREE_TILING_ANY,
-false);
+separate_lods);
   if (!mt-stencil_mt) {
 intel_miptree_release(mt);
 return NULL;
@@ -1393,6 +1394,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
struct intel_mipmap_tree *mt)
 {
assert(mt-hiz_mt == NULL);
+   bool separate_lods = brw-gen == 6;
mt-hiz_mt = intel_miptree_create(brw,
  mt-target,
  mt-format,
@@ -1404,7 +1406,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
  true,
  mt-num_samples,
  INTEL_MIPTREE_TILING_ANY,
- false);
+ separate_lods);
 
if (!mt-hiz_mt)
   return false;
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 01/16] i965: Split gen6 renderbuffer surface state from gen5 and older

2014-05-29 Thread Jordan Justen
We will program the gen6 renderbuffer surface state differently to
enable layered rendering on gen6.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_context.c|   4 +
 src/mesa/drivers/dri/i965/brw_state.h  |   3 +
 src/mesa/drivers/dri/i965/gen6_surface_state.c | 152 +
 4 files changed, 160 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/gen6_surface_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 2570059..1871449 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -129,6 +129,7 @@ i965_FILES = \
gen6_scissor_state.c \
gen6_sf_state.c \
 gen6_sol.c \
+   gen6_surface_state.c \
gen6_urb.c \
gen6_viewport_state.c \
gen6_vs_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 39dd582..d35dea5 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -639,6 +639,10 @@ brwCreateContext(gl_api api,
   gen7_init_vtable_surface_functions(brw);
   gen7_init_vtable_sampler_functions(brw);
   brw-vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
+   } else if (brw-gen = 6) {
+  gen6_init_vtable_surface_functions(brw);
+  gen4_init_vtable_sampler_functions(brw);
+  brw-vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
} else {
   gen4_init_vtable_surface_functions(brw);
   gen4_init_vtable_sampler_functions(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index dbcf7c7..1669b77 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -261,6 +261,9 @@ calculate_attr_overrides(const struct brw_context *brw,
  uint32_t *flat_enables,
  uint32_t *urb_entry_read_length);
 
+/* gen6_surface_state.c */
+void gen6_init_vtable_surface_functions(struct brw_context *brw);
+
 /* brw_vs_surface_state.c */
 void
 brw_upload_vec4_pull_constants(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c 
b/src/mesa/drivers/dri/i965/gen6_surface_state.c
new file mode 100644
index 000..9fec372
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include main/context.h
+#include main/blend.h
+#include main/mtypes.h
+#include main/samplerobj.h
+#include program/prog_parameter.h
+
+#include intel_mipmap_tree.h
+#include intel_batchbuffer.h
+#include intel_tex.h
+#include intel_fbo.h
+#include intel_buffer_objects.h
+
+#include brw_context.h
+#include brw_state.h
+#include brw_defines.h
+#include brw_wm.h
+
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static void
+gen6_update_renderbuffer_surface(struct brw_context *brw,
+ struct gl_renderbuffer *rb,
+ bool layered,
+ unsigned int unit)
+{
+   struct gl_context *ctx = brw-ctx;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct intel_mipmap_tree *mt = irb-mt;
+   uint32_t *surf;
+   uint32_t tile_x, tile_y;
+   uint32_t format = 0;
+   /* _NEW_BUFFERS */
+   mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
+   uint32_t surf_index =
+  brw-wm.prog_data-binding_table.render_target_start + unit;
+
+   assert(!layered);
+
+   if (rb-TexImage  

[Mesa-dev] [RFC PATCH 13/16] i965: Support array_spacing_lod0 for multiple miplevels

2014-05-29 Thread Jordan Justen
Previously array spacing lod0 was only used with a single mip level.

It indicated that no mip level spacing should be used between array
slices.

gen6 stencil only support LOD0, so we need to allocate the miptree
similar to array spacing lod0, except we also need space for more
than just one mip level.

So, the miptree is allocated with tightly packed array slice spacing,
but we still also pack the miplevels into the region similar to a
normal multi mip level packing.

Essentially, the miptree is set up as if the texture slices formed
one tall 2D texture, rather than a 2D array.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 76044b2..8a0912d 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -203,6 +203,11 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
   if (mt-compressed)
 img_height /= mt-align_h;
 
+  if (mt-array_spacing_lod0) {
+ /* Compact arrays with separated miplevels */
+img_height *= depth;
+  }
+
   /* Because the images are packed better, the final offset
* might not be the maximal one:
*/
@@ -238,6 +243,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
 struct intel_mipmap_tree *mt)
 {
int h0, h1;
+   unsigned height = mt-physical_height0;
 
h0 = ALIGN(mt-physical_height0, mt-align_h);
h1 = ALIGN(minify(mt-physical_height0, 1), mt-align_h);
@@ -251,11 +257,22 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
brw_miptree_layout_2d(mt);
 
for (unsigned level = mt-first_level; level = mt-last_level; level++) {
+  unsigned img_height;
+  img_height = ALIGN(height, mt-align_h);
+  if (mt-compressed)
+ img_height /= mt-align_h;
+
   for (int q = 0; q  mt-physical_depth0; q++) {
-intel_miptree_set_image_offset(mt, level, q, 0, q * physical_qpitch);
+ if (mt-array_spacing_lod0) {
+intel_miptree_set_image_offset(mt, level, q, 0, q * img_height);
+ } else {
+intel_miptree_set_image_offset(mt, level, q, 0, q * 
physical_qpitch);
+ }
   }
+  height = minify(height, 1);
}
-   mt-total_height = physical_qpitch * mt-physical_depth0;
+   if (!mt-array_spacing_lod0)
+  mt-total_height = physical_qpitch * mt-physical_depth0;
 
align_cube(mt);
 }
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 06/16] i965/gen6 depth surface: calculate depth (array size) for depth surface

2014-05-29 Thread Jordan Justen
(bc1acaa for gen6)

This will be used in 3DSTATE_DEPTH_BUFFER in a later patch.

Note: Cube maps are treated as 2D arrays with 6 times as
many array elements as the cube map array would have.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 2 ++
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 04afa24..d434134 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -793,6 +793,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t draw_y = params-depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
uint32_t surftype;
+   unsigned int depth = MAX2(params-depth.mt-logical_depth0, 1);
GLenum gl_target = params-depth.mt-target;
 
switch (gl_target) {
@@ -804,6 +805,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
* equivalent.
*/
   surftype = BRW_SURFACE_2D;
+  depth *= 6;
   break;
default:
   surftype = translate_tex_target(gl_target);
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index 9066dc8..d10eb14 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -135,6 +135,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
struct gl_context *ctx = brw-ctx;
struct gl_framebuffer *fb = ctx-DrawBuffer;
uint32_t surftype;
+   unsigned int depth = 1;
GLenum gl_target = GL_TEXTURE_2D;
const struct intel_renderbuffer *irb = NULL;
const struct gl_renderbuffer *rb = NULL;
@@ -165,6 +166,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
rb = (struct gl_renderbuffer*) irb;
 
if (rb) {
+  depth = MAX2(rb-Depth, 1);
   if (rb-TexImage)
  gl_target = rb-TexImage-TexObject-Target;
}
@@ -178,6 +180,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
* equivalent.
*/
   surftype = BRW_SURFACE_2D;
+  depth *= 6;
   break;
default:
   surftype = translate_tex_target(gl_target);
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 07/16] i965/gen6 depth surface: calculate LOD being rendered to

2014-05-29 Thread Jordan Justen
(08ef1dd for gen6)

This will be used in 3DSTATE_DEPTH_BUFFER in a later patch.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 3 +++
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index d434134..3f6b929 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -795,6 +795,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t surftype;
unsigned int depth = MAX2(params-depth.mt-logical_depth0, 1);
GLenum gl_target = params-depth.mt-target;
+   unsigned int lod;
 
switch (gl_target) {
case GL_TEXTURE_CUBE_MAP_ARRAY:
@@ -818,6 +819,8 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
NULL,
tile_mask_x, tile_mask_y);
 
+   lod = params-depth.level - params-depth.mt-first_level;
+
/* 3DSTATE_DEPTH_BUFFER */
{
   uint32_t tile_x = draw_x  tile_mask_x;
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index d10eb14..0fd8882 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -137,6 +137,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
uint32_t surftype;
unsigned int depth = 1;
GLenum gl_target = GL_TEXTURE_2D;
+   unsigned int lod;
const struct intel_renderbuffer *irb = NULL;
const struct gl_renderbuffer *rb = NULL;
 
@@ -187,6 +188,8 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
   break;
}
 
+   lod = irb ? irb-mt_level - irb-mt-first_level : 0;
+
unsigned int len;
if (brw-gen = 6)
   len = 7;
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 08/16] i965/gen6 depth surface: calculate minimum array element being rendered

2014-05-29 Thread Jordan Justen
(a23cfb8 for gen6)

In layered rendering this will be 0. Otherwise it will be the
selected slice.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp |  3 +++
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 3f6b929..5a55676 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -794,6 +794,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t tile_mask_x, tile_mask_y;
uint32_t surftype;
unsigned int depth = MAX2(params-depth.mt-logical_depth0, 1);
+   unsigned int min_array_element;
GLenum gl_target = params-depth.mt-target;
unsigned int lod;
 
@@ -819,6 +820,8 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
NULL,
tile_mask_x, tile_mask_y);
 
+   min_array_element = params-depth.layer;
+
lod = params-depth.level - params-depth.mt-first_level;
 
/* 3DSTATE_DEPTH_BUFFER */
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index 0fd8882..cacc6ee 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -136,6 +136,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
struct gl_framebuffer *fb = ctx-DrawBuffer;
uint32_t surftype;
unsigned int depth = 1;
+   unsigned int min_array_element;
GLenum gl_target = GL_TEXTURE_2D;
unsigned int lod;
const struct intel_renderbuffer *irb = NULL;
@@ -188,6 +189,15 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
   break;
}
 
+   if (fb-MaxNumLayers  0 || !irb) {
+  min_array_element = 0;
+   } else if (irb-mt-num_samples  1) {
+  /* Convert physical layer to logical layer. */
+  min_array_element = irb-mt_layer / irb-mt-num_samples;
+   } else {
+  min_array_element = irb-mt_layer;
+   }
+
lod = irb ? irb-mt_level - irb-mt-first_level : 0;
 
unsigned int len;
-- 
2.0.0.rc4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 12/16] i965: Allow forcing a LOD0 spacing miptree layout

2014-05-29 Thread Jordan Justen
gen6 does not support multiple miplevels with stencil. Therefore we
need to layout it's miptree with no LOD0 spacing between the slices of
each miplevel.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 src/mesa/drivers/dri/i965/intel_fbo.c  |  3 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 31 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  6 +++--
 src/mesa/drivers/dri/i965/intel_tex.c  |  3 ++-
 src/mesa/drivers/dri/i965/intel_tex_image.c|  3 ++-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c |  3 ++-
 src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 ++-
 7 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index 22f707f..87abbf6 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -980,7 +980,8 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
  width, height, depth,
  true,
  irb-mt-num_samples,
- INTEL_MIPTREE_TILING_ANY);
+ INTEL_MIPTREE_TILING_ANY,
+ false);
 
if (brw_is_hiz_depth_format(brw, new_mt-format)) {
   intel_miptree_alloc_hiz(brw, new_mt);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b7d86a3..2d4224c 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -242,7 +242,8 @@ intel_miptree_create_layout(struct brw_context *brw,
 GLuint height0,
 GLuint depth0,
 bool for_bo,
-GLuint num_samples)
+GLuint num_samples,
+bool force_array_spacing)
 {
struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
if (!mt)
@@ -376,7 +377,8 @@ intel_miptree_create_layout(struct brw_context *brw,
 mt-logical_depth0,
 true,
 num_samples,
-INTEL_MIPTREE_TILING_ANY);
+INTEL_MIPTREE_TILING_ANY,
+false);
   if (!mt-stencil_mt) {
 intel_miptree_release(mt);
 return NULL;
@@ -394,6 +396,9 @@ intel_miptree_create_layout(struct brw_context *brw,
   }
}
 
+   if (force_array_spacing)
+  mt-array_spacing_lod0 = true;
+
brw_miptree_layout(brw, mt);
 
return mt;
@@ -548,7 +553,8 @@ intel_miptree_create(struct brw_context *brw,
 GLuint depth0,
 bool expect_accelerated_upload,
  GLuint num_samples,
- enum intel_miptree_tiling_mode requested_tiling)
+ enum intel_miptree_tiling_mode requested_tiling,
+ bool force_array_spacing)
 {
struct intel_mipmap_tree *mt;
mesa_format tex_format = format;
@@ -562,7 +568,8 @@ intel_miptree_create(struct brw_context *brw,
mt = intel_miptree_create_layout(brw, target, format,
  first_level, last_level, width0,
  height0, depth0,
- false, num_samples);
+false, num_samples,
+force_array_spacing);
/*
 * pitch == 0 || height == 0  indicates the null texture
 */
@@ -673,7 +680,7 @@ intel_miptree_create_for_bo(struct brw_context *brw,
mt = intel_miptree_create_layout(brw, GL_TEXTURE_2D, format,
 0, 0,
 width, height, 1,
-true, 0 /* num_samples */);
+true, 0, false);
if (!mt) {
   free(mt);
   return mt;
@@ -782,7 +789,7 @@ intel_miptree_create_for_renderbuffer(struct brw_context 
*brw,
 
mt = intel_miptree_create(brw, target, format, 0, 0,
 width, height, depth, true, num_samples,
- INTEL_MIPTREE_TILING_ANY);
+ INTEL_MIPTREE_TILING_ANY, false);
if (!mt)
   goto fail;
 
@@ -1283,7 +1290,8 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
  mt-logical_depth0,
  true,
  0 /* num_samples */,
- INTEL_MIPTREE_TILING_Y);
+ INTEL_MIPTREE_TILING_Y,
+ false);
 
/* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
 

[Mesa-dev] [PATCH 3/3] svga: use svga_shader_too_large() in compile_vs()

2014-05-29 Thread Brian Paul
And rework the dummy shader code to match the fragment shader case.
---
 src/gallium/drivers/svga/svga_state_vs.c |   44 --
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_state_vs.c 
b/src/gallium/drivers/svga/svga_state_vs.c
index 64dd7c9..125903b 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -100,6 +100,29 @@ get_dummy_vertex_shader(void)
 
 
 /**
+ * Replace the given shader's instruction with a simple / dummy shader.
+ * We use this when normal shader translation fails.
+ */
+static struct svga_shader_variant *
+get_compiled_dummy_vertex_shader(struct svga_vertex_shader *vs,
+ const struct svga_vs_compile_key *key)
+{
+   const struct tgsi_token *dummy = get_dummy_vertex_shader();
+   struct svga_shader_variant *variant;
+
+   if (!dummy) {
+  return NULL;
+   }
+
+   FREE((void *) vs-base.tokens);
+   vs-base.tokens = dummy;
+
+   variant = svga_translate_vertex_program(vs, key);
+   return variant;
+}
+
+
+/**
  * Translate TGSI shader into an svga shader variant.
  */
 static enum pipe_error
@@ -114,16 +137,21 @@ compile_vs(struct svga_context *svga,
variant = svga_translate_vertex_program( vs, key );
if (variant == NULL) {
   /* some problem during translation, try the dummy shader */
-  const struct tgsi_token *dummy = get_dummy_vertex_shader();
-  if (!dummy) {
- ret = PIPE_ERROR_OUT_OF_MEMORY;
+  variant = get_compiled_dummy_vertex_shader(vs, key);
+  if (!variant) {
+ ret = PIPE_ERROR;
  goto fail;
   }
-  debug_printf(Failed to compile vertex shader, using dummy shader 
instead.\n);
-  FREE((void *) vs-base.tokens);
-  vs-base.tokens = dummy;
-  variant = svga_translate_vertex_program(vs, key);
-  if (variant == NULL) {
+   }
+
+   if (svga_shader_too_large(svga, variant)) {
+  /* too big, use dummy shader */
+  debug_printf(Shader too large (%lu bytes),
+using dummy shader instead.\n,
+   (unsigned long ) variant-nr_tokens
+   * sizeof(variant-tokens[0]));
+  variant = get_compiled_dummy_vertex_shader(vs, key);
+  if (!variant) {
  ret = PIPE_ERROR;
  goto fail;
   }
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] svga: added svga_shader_too_large() helper

2014-05-29 Thread Brian Paul
To check if a shader bytcode exceeds the device limit.  There's no
limit when using GBS.
---
 src/gallium/drivers/svga/svga_shader.h |   21 +
 1 file changed, 21 insertions(+)

diff --git a/src/gallium/drivers/svga/svga_shader.h 
b/src/gallium/drivers/svga/svga_shader.h
index 348dc0c..fd500ae 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -41,4 +41,25 @@ svga_destroy_shader_variant(struct svga_context *svga,
 struct svga_shader_variant *variant);
 
 
+/**
+ * Check if a shader's bytecode exceeds the device limits.
+ */
+static INLINE boolean
+svga_shader_too_large(const struct svga_context *svga,
+  const struct svga_shader_variant *variant)
+{
+   if (svga_have_gb_objects(svga)) {
+  return FALSE;
+   }
+
+   if (variant-nr_tokens * sizeof(variant-tokens[0])
+   + sizeof(SVGA3dCmdDefineShader) + sizeof(SVGA3dCmdHeader)
+SVGA_CB_MAX_COMMAND_SIZE) {
+  return FALSE;
+   }
+
+   return TRUE;
+}
+
+
 #endif /* SVGA_SHADER_H */
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] svga: use svga_shader_too_large() in compile_fs()

2014-05-29 Thread Brian Paul
---
 src/gallium/drivers/svga/svga_state_fs.c |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_state_fs.c 
b/src/gallium/drivers/svga/svga_state_fs.c
index 7931528..8f419fa 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -157,9 +157,7 @@ compile_fs(struct svga_context *svga,
   }
}
 
-   if (variant-nr_tokens * sizeof(variant-tokens[0])
-   + sizeof(SVGA3dCmdDefineShader) + sizeof(SVGA3dCmdHeader)
-   = SVGA_CB_MAX_COMMAND_SIZE) {
+   if (svga_shader_too_large(svga, variant)) {
   /* too big, use dummy shader */
   debug_printf(Shader too large (%lu bytes),
 using dummy shader instead.\n,
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] i965: Don't emit SURFACE_STATEs for gather workarounds on Broadwell.

2014-05-29 Thread Chris Forbes
Reviewed-by: Chris Forbes chr...@ijw.co.nz

On Thu, May 29, 2014 at 7:32 PM, Chris Forbes chr...@ijw.co.nz wrote:
 Did this test run include forcing ARB_gpu_shader5 on?

 On Thu, May 29, 2014 at 7:06 PM, Kenneth Graunke kenn...@whitecape.org 
 wrote:
 As far as I can tell, Broadwell doesn't need any of the SURFACE_STATE
 workarounds for textureGather() bugs, so there's no need to emit
 a second set of identical copies.

 To keep things simple, just point the gather surface index base to the
 same place as the texture surface index base.

 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 Cc: 10.2 mesa-sta...@lists.freedesktop.org
 ---
  src/mesa/drivers/dri/i965/brw_shader.cpp |  9 +++--
  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 14 --
  2 files changed, 15 insertions(+), 8 deletions(-)

 No Piglit regressions on Broadwell with -t texture.

 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
 b/src/mesa/drivers/dri/i965/brw_shader.cpp
 index f4f1334..7fae416 100644
 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
 @@ -740,8 +740,13 @@ 
 backend_visitor::assign_common_binding_table_offsets(uint32_t 
 next_binding_table
 }

 if (prog-UsesGather) {
 -  stage_prog_data-binding_table.gather_texture_start = 
 next_binding_table_offset;
 -  next_binding_table_offset += num_textures;
 +  if (brw-gen = 8) {
 + stage_prog_data-binding_table.gather_texture_start =
 +stage_prog_data-binding_table.texture_start;
 +  } else {
 + stage_prog_data-binding_table.gather_texture_start = 
 next_binding_table_offset;
 + next_binding_table_offset += num_textures;
 +  }
 } else {
stage_prog_data-binding_table.gather_texture_start = 0xd0d0d0d0;
 }
 diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
 b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
 index c9d9614..3279d3b 100644
 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
 +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
 @@ -829,12 +829,14 @@ brw_update_texture_surfaces(struct brw_context *brw)
 /* emit alternate set of surface state for gather. this
  * allows the surface format to be overriden for only the
  * gather4 messages. */
 -   if (vs  vs-UsesGather)
 -  update_stage_texture_surfaces(brw, vs, brw-vs.base, true);
 -   if (gs  gs-UsesGather)
 -  update_stage_texture_surfaces(brw, gs, brw-gs.base, true);
 -   if (fs  fs-UsesGather)
 -  update_stage_texture_surfaces(brw, fs, brw-wm.base, true);
 +   if (brw-gen  8) {
 +  if (vs  vs-UsesGather)
 + update_stage_texture_surfaces(brw, vs, brw-vs.base, true);
 +  if (gs  gs-UsesGather)
 + update_stage_texture_surfaces(brw, gs, brw-gs.base, true);
 +  if (fs  fs-UsesGather)
 + update_stage_texture_surfaces(brw, fs, brw-wm.base, true);
 +   }

 brw-state.dirty.brw |= BRW_NEW_SURFACES;
  }
 --
 1.9.1

 ___
 mesa-stable mailing list
 mesa-sta...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: Eliminate unused built-in variables after compilation

2014-05-29 Thread Matt Turner
On Wed, May 28, 2014 at 6:35 PM, Ian Romanick i...@freedesktop.org wrote:
 From: Ian Romanick ian.d.roman...@intel.com

 After compilation (and before linking) we can eliminate quite a few
 built-in variables.  Basically, any uniform or constant (e.g.,
 gl_MaxVertexTextureImageUnits) that isn't used (with one exception) can
 be eliminated.  System values, vertex shader inputs (with one
 exception), and fragment shader outputs that are not used and not
 re-declared in the shader text can also be removed.

 gl_ModelViewProjectMatrix and gl_Vertex are used by the built-in
 function ftransform.  There are some complications with eliminating
 these variables (see the comment in the patch), so they are not
 eliminated.

 Reduces the peak ir_variable memory usage in a trimmed apitrace of dota2
 by 3.5MB on 64-bit.

What was the total memory usage of ir_variable? It's been hard for me
to make sense of reductions without percentages.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: Eliminate unused built-in variables after compilation

2014-05-29 Thread Ian Romanick
On 05/29/2014 02:20 PM, Matt Turner wrote:
 On Wed, May 28, 2014 at 6:35 PM, Ian Romanick i...@freedesktop.org wrote:
 From: Ian Romanick ian.d.roman...@intel.com

 After compilation (and before linking) we can eliminate quite a few
 built-in variables.  Basically, any uniform or constant (e.g.,
 gl_MaxVertexTextureImageUnits) that isn't used (with one exception) can
 be eliminated.  System values, vertex shader inputs (with one
 exception), and fragment shader outputs that are not used and not
 re-declared in the shader text can also be removed.

 gl_ModelViewProjectMatrix and gl_Vertex are used by the built-in
 function ftransform.  There are some complications with eliminating
 these variables (see the comment in the patch), so they are not
 eliminated.

 Reduces the peak ir_variable memory usage in a trimmed apitrace of dota2
 by 3.5MB on 64-bit.
 
 What was the total memory usage of ir_variable? It's been hard for me
 to make sense of reductions without percentages.

Before: IR MEM: variable usage / name / total: 4118280 644100 4762380
After:  IR MEM: variable usage / name / total: 1473408 256871 1730279

Before the change, the ir_memory_usage visitor added up 4762380 bytes of
memory allocated for ir_variable, ir_variable::name, and
ir_variable::state_slots.  After the change it added up 1730279 bytes.
So... 63.7% reduction?

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: Eliminate unused built-in variables after compilation

2014-05-29 Thread Matt Turner
On Thu, May 29, 2014 at 2:26 PM, Ian Romanick i...@freedesktop.org wrote:
 On 05/29/2014 02:20 PM, Matt Turner wrote:
 On Wed, May 28, 2014 at 6:35 PM, Ian Romanick i...@freedesktop.org wrote:
 From: Ian Romanick ian.d.roman...@intel.com

 After compilation (and before linking) we can eliminate quite a few
 built-in variables.  Basically, any uniform or constant (e.g.,
 gl_MaxVertexTextureImageUnits) that isn't used (with one exception) can
 be eliminated.  System values, vertex shader inputs (with one
 exception), and fragment shader outputs that are not used and not
 re-declared in the shader text can also be removed.

 gl_ModelViewProjectMatrix and gl_Vertex are used by the built-in
 function ftransform.  There are some complications with eliminating
 these variables (see the comment in the patch), so they are not
 eliminated.

 Reduces the peak ir_variable memory usage in a trimmed apitrace of dota2
 by 3.5MB on 64-bit.

 What was the total memory usage of ir_variable? It's been hard for me
 to make sense of reductions without percentages.

 Before: IR MEM: variable usage / name / total: 4118280 644100 4762380
 After:  IR MEM: variable usage / name / total: 1473408 256871 1730279

 Before the change, the ir_memory_usage visitor added up 4762380 bytes of
 memory allocated for ir_variable, ir_variable::name, and
 ir_variable::state_slots.  After the change it added up 1730279 bytes.
 So... 63.7% reduction?


Oh, duh. Thanks.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Eliminate unused built-in variables after compilation

2014-05-29 Thread Ian Romanick
From: Ian Romanick ian.d.roman...@intel.com

After compilation (and before linking) we can eliminate quite a few
built-in variables.  Basically, any uniform or constant (e.g.,
gl_MaxVertexTextureImageUnits) that isn't used (with one exception) can
be eliminated.  System values, vertex shader inputs (with one
exception), and fragment shader outputs that are not used and not
re-declared in the shader text can also be removed.

gl_ModelViewProjectMatrix and gl_Vertex are used by the built-in
function ftransform.  There are some complications with eliminating
these variables (see the comment in the patch), so they are not
eliminated.

Reduces the peak ir_variable memory usage in a trimmed apitrace of dota2
by 3.5MB on 64-bit.

Before: IR MEM: variable usage / name / total: 5327760 894914 6222674
After:  IR MEM: variable usage / name / total: 2156568 318192 2474760

Reduces the peak ir_variable memory usage in a trimmed apitrace of dota2
by 2.8MB on 32-bit.

Before: IR MEM: variable usage / name / total: 4118280 644100 4762380
After:  IR MEM: variable usage / name / total: 1473408 256871 1730279

v2: Don't remove any built-in with Transpose in the name.

Signed-off-by: Ian Romanick ian.d.roman...@intel.com
Suggested-by: Eric Anholt e...@anholt.net
Cc: Eric Anholt e...@anholt.net
Cc: Tapani Pälli tapani.pa...@intel.com
Cc: Eero Tamminen eero.t.tammi...@intel.com
---
After my last edit, I re-ran piglit, re-ran the apitrace, and did 'git
commit --amend' instead of 'git commit --amend -a'. :(

 src/glsl/Makefile.sources   |  1 +
 src/glsl/glsl_parser_extras.cpp | 20 
 src/glsl/ir_optimization.h  |  2 +
 src/glsl/opt_dead_builtin_variables.cpp | 83 +
 4 files changed, 106 insertions(+)
 create mode 100644 src/glsl/opt_dead_builtin_variables.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 6e230f7..a733323 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -88,6 +88,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/opt_copy_propagation.cpp \
$(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \
$(GLSL_SRCDIR)/opt_cse.cpp \
+   $(GLSL_SRCDIR)/opt_dead_builtin_variables.cpp \
$(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \
$(GLSL_SRCDIR)/opt_dead_code.cpp \
$(GLSL_SRCDIR)/opt_dead_code_local.cpp \
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index d3339e7..323cb23 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1485,6 +1485,26 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct 
gl_shader *shader,
  ;
 
   validate_ir_tree(shader-ir);
+
+  enum ir_variable_mode other;
+  switch (shader-Stage) {
+  case MESA_SHADER_VERTEX:
+ other = ir_var_shader_in;
+ break;
+  case MESA_SHADER_FRAGMENT:
+ other = ir_var_shader_out;
+ break;
+  default:
+ /* Something invalide to ensure optimize_dead_builtin_uniforms
+  * doesn't remove anything other than uniforms or constants.
+  */
+ other = ir_var_mode_count;
+ break;
+  }
+
+  optimize_dead_builtin_variables(shader-ir, other);
+
+  validate_ir_tree(shader-ir);
}
 
if (shader-InfoLog)
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index c63921c..2dfd81e 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -122,6 +122,8 @@ void lower_named_interface_blocks(void *mem_ctx, gl_shader 
*shader);
 bool optimize_redundant_jumps(exec_list *instructions);
 bool optimize_split_arrays(exec_list *instructions, bool linked);
 bool lower_offset_arrays(exec_list *instructions);
+void optimize_dead_builtin_variables(exec_list *instructions,
+ enum ir_variable_mode other);
 
 ir_rvalue *
 compare_index_block(exec_list *instructions, ir_variable *index,
diff --git a/src/glsl/opt_dead_builtin_variables.cpp 
b/src/glsl/opt_dead_builtin_variables.cpp
new file mode 100644
index 000..afe22f2
--- /dev/null
+++ b/src/glsl/opt_dead_builtin_variables.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE 

Re: [Mesa-dev] [PATCH] glx: Don't uselessly dlopen libGL within libGL

2014-05-29 Thread Ian Romanick
On 05/29/2014 10:47 AM, Adam Jackson wrote:
 This is entirely pointless.  The DRI driver does not need any symbols
 from libGL, even if it did libGL would already be available to resolve
 them because that's how dlopen works.

That code was originally added by:

commit 061a3fe34051327fba418cc99599ecff0016ee15
Author: Michel Dänzer mic...@daenzer.net
Date:   Mon Aug 14 15:53:37 2006 +

Bug #7169: Attempt to make libGL symbols visible to drivers.

Some applications end up dlopening libGL without RTLD_GLOBAL, so the libGL
symbols referenced by the driver can't be unresolved when libGL dlopens it.
This attempts to make the libGL symbols visible to the driver by dlopening
libGL (again) with RTLD_GLOBAL before dlopening the driver and dlclosing
the obtained handle afterwards.

So... I think that's not how dlopen always works.  For other reasons, I
don't think the DRI drivers directly access symbols from libGL, so this
may be safe anyway.  I'd want to see it tested, though.

 Signed-off-by: Adam Jackson a...@redhat.com
 ---
  src/glx/dri_common.c | 8 +---
  1 file changed, 1 insertion(+), 7 deletions(-)
 
 diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
 index e5a3f70..235d350 100644
 --- a/src/glx/dri_common.c
 +++ b/src/glx/dri_common.c
 @@ -93,14 +93,11 @@ dri_message(int level, const char *f, ...)
  _X_HIDDEN void *
  driOpenDriver(const char *driverName)
  {
 -   void *glhandle, *handle;
 +   void *handle;
 const char *libPaths, *p, *next;
 char realDriverName[200];
 int len;
  
 -   /* Attempt to make sure libGL symbols will be visible to the driver */
 -   glhandle = dlopen(libGL.so.1, RTLD_NOW | RTLD_LOCAL);
 -
 libPaths = NULL;
 if (geteuid() == getuid()) {
/* don't allow setuid apps to use LIBGL_DRIVERS_PATH */
 @@ -146,9 +143,6 @@ driOpenDriver(const char *driverName)
 if (!handle)
ErrorMessageF(unable to load driver: %s_dri.so\n, driverName);
  
 -   if (glhandle)
 -  dlclose(glhandle);
 -
 return handle;
  }
  

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 79421] [llvmpipe] SIGSEGV src/gallium/drivers/llvmpipe/lp_rast_priv.h:218

2014-05-29 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=79421

--- Comment #1 from Roland Scheidegger srol...@vmware.com ---
I can't reproduce this, and valgrind didn't show anything suspicious neither.
Is this a regression?
I see the problem though, we're missing a test for non-zero cbuf in
lp_rast_shade_tile (which we already do in lp_rast_shade_quads_mask).
For some reason though when I run the test it never touches lp_rast_shade_tile
just lp_rast_shade_quads_mask which certainly explains why it doesn't crash -
from a quick glance with the geometry I get lp_rast_shade_tile should be
impossible. In any case I increased the FB_SIZE in the piglit test which made
it crash indeed. I'm going to post a fix...

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] llvmpipe: fix crash when not all attachments are populated in a fb

2014-05-29 Thread sroland
From: Roland Scheidegger srol...@vmware.com

Framebuffers can have NULL attachments since a while. llvmpipe handled
that properly for lp_rast_shade_quads_mask but it seems the change didn't
make it to lp_rast_shade_tile.
This fixes piglit fbo-drawbuffers-none test (though I need to increase
the FB_SIZE from 32 to 256 so the tris cover some tiles fully).
https://bugs.freedesktop.org/show_bug.cgi?id=79421

Cc: 10.1 10.2 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/llvmpipe/lp_rast.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c 
b/src/gallium/drivers/llvmpipe/lp_rast.c
index d50ee48..f7c5b03 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -312,9 +312,15 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
 
  /* color buffer */
  for (i = 0; i  scene-fb.nr_cbufs; i++){
-stride[i] = scene-cbufs[i].stride;
-color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, 
tile_x + x,
-  tile_y + y, 
inputs-layer);
+if (scene-fb.cbufs[i]) {
+   stride[i] = scene-cbufs[i].stride;
+   color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, 
tile_x + x,
+ tile_y + 
y, inputs-layer);
+}
+else {
+   stride[i] = 0;
+   color[i] = NULL;
+}
  }
 
  /* depth buffer */
-- 
1.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] llvmpipe: (trivial) drop unswizzled from some function names

2014-05-29 Thread sroland
From: Roland Scheidegger srol...@vmware.com

This made sense when swizzled storage layout was used for rendering to tiles.
But nowadays the name just adds confusion (and makes for long lines).
---
 src/gallium/drivers/llvmpipe/lp_rast.c  | 16 +--
 src/gallium/drivers/llvmpipe/lp_rast_priv.h | 42 +++--
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c 
b/src/gallium/drivers/llvmpipe/lp_rast.c
index f7c5b03..a3420a2 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -185,7 +185,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
 
if (scene-fb.zsbuf) {
   unsigned layer;
-  uint8_t *dst_layer = lp_rast_get_unswizzled_depth_tile_pointer(task, 
LP_TEX_USAGE_READ_WRITE);
+  uint8_t *dst_layer = lp_rast_get_depth_tile_pointer(task, 
LP_TEX_USAGE_READ_WRITE);
   block_size = util_format_get_blocksize(scene-fb.zsbuf-format);
 
   clear_value = clear_mask;
@@ -314,8 +314,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
  for (i = 0; i  scene-fb.nr_cbufs; i++){
 if (scene-fb.cbufs[i]) {
stride[i] = scene-cbufs[i].stride;
-   color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, 
tile_x + x,
- tile_y + 
y, inputs-layer);
+   color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x,
+  tile_y + y, 
inputs-layer);
 }
 else {
stride[i] = 0;
@@ -325,8 +325,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
 
  /* depth buffer */
  if (scene-zsbuf.map) {
-depth = lp_rast_get_unswizzled_depth_block_pointer(task, tile_x + 
x,
-   tile_y + y, 
inputs-layer);
+depth = lp_rast_get_depth_block_pointer(task, tile_x + x,
+tile_y + y, inputs-layer);
 depth_stride = scene-zsbuf.stride;
  }
 
@@ -409,8 +409,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
for (i = 0; i  scene-fb.nr_cbufs; i++) {
   if (scene-fb.cbufs[i]) {
  stride[i] = scene-cbufs[i].stride;
- color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y,
-   inputs-layer);
+ color[i] = lp_rast_get_color_block_pointer(task, i, x, y,
+inputs-layer);
   }
   else {
  stride[i] = 0;
@@ -421,7 +421,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
/* depth buffer */
if (scene-zsbuf.map) {
   depth_stride = scene-zsbuf.stride;
-  depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y, 
inputs-layer);
+  depth = lp_rast_get_depth_block_pointer(task, x, y, inputs-layer);
}
 
assert(lp_check_alignment(state-jit_context.u8_blend_color, 16));
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h 
b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 063a70e..d92230d 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -143,11 +143,11 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
 
 
 /**
- * Get pointer to the unswizzled color tile
+ * Get pointer to the color tile
  */
 static INLINE uint8_t *
-lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
-  unsigned buf, enum lp_texture_usage 
usage)
+lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
+   unsigned buf, enum lp_texture_usage usage)
 {
const struct lp_scene *scene = task-scene;
unsigned format_bytes;
@@ -163,7 +163,8 @@ lp_rast_get_unswizzled_color_tile_pointer(struct 
lp_rasterizer_task *task,
   assert(cbuf);
 
   format_bytes = util_format_get_blocksize(cbuf-format);
-  task-color_tiles[buf] = scene-cbufs[buf].map + 
scene-cbufs[buf].stride * task-y + format_bytes * task-x;
+  task-color_tiles[buf] = scene-cbufs[buf].map + 
scene-cbufs[buf].stride * task-y +
+   format_bytes * task-x;
}
 
return task-color_tiles[buf];
@@ -171,11 +172,11 @@ lp_rast_get_unswizzled_color_tile_pointer(struct 
lp_rasterizer_task *task,
 
 
 /**
- * Get pointer to the unswizzled depth tile
+ * Get pointer to the depth tile
  */
 static INLINE uint8_t *
-lp_rast_get_unswizzled_depth_tile_pointer(struct lp_rasterizer_task *task,
-  enum lp_texture_usage usage)
+lp_rast_get_depth_tile_pointer(struct lp_rasterizer_task *task,
+   enum lp_texture_usage usage)
 {
const struct lp_scene *scene = task-scene;
unsigned 

Re: [Mesa-dev] [PATCH V2 11/11] Radeonsi: Use dma_copy when possible for si_blit.

2014-05-29 Thread Marek Olšák
On Thu, May 29, 2014 at 8:15 PM, Axel Davy axel.d...@ens.fr wrote:
 On 29/05/2014 13:37, Marek Olšák wrote :

 Hi Axel,

 Could you please describe what GPU offloading means?

 It's using a different GPU to render and to display.

 Basically the gpu offloading support added by the patches is similar to
 enduro.

 Some checks can be modified to:

 info-src.box.width  0 
 info-src.box.height  0 
 ... resource-nr_samples = 1

 We discussed on irc about nr_samples, and it was not clear what the
 behaviour should be when it equals 1,
 thus the decision to only allow 0 for this path.
 I agree for the width and height.

I guess those people on IRC were just guessing. I can assure you that
0 and 1 both mean 1 sample. All of Mesa code uses nr_samples = 1
and nr_samples  1 expressions when checking for MSAA resources.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa IR as a list of instructions

2014-05-29 Thread Marek Olšák
I have just some comments below.

On Wed, May 28, 2014 at 8:37 PM, Eric Anholt e...@anholt.net wrote:
 Here's a series I started back in January as a little experiment.
 Basically, I feel guilty for pushing GLSL IR into the driver, and wish I'd
 just fixed up Mesa IR back in the day.  But, given that we're still
 feeding Mesa IR into drivers as well (ARB programs and fixed function
 vertex programs), it made me think: What if I fixed it up now, and got
 Mesa IR to the point that we could just garbage collect the GLSL IR input
 paths to drivers?

 Mesa IR has a bunch of weaknesses that need to get sorted out if it's
 going to be useful:

 - It's a single giant array of instructions, making modifications of the
   instruction stream (instruction lowering, optimization, etc.) more
   expensive in code and CPU time than it should be.

A giant array of instructions is great if you don't expect people to
change it, which is why TGSI is an array too (it's actually one
continuous buffer).

 - It doesn't have any variable declarations, so if you have dynamic array
   indexing, optimization just shuts down (plus, no annotation on the
   temps, so debugging is irritating).

TGSI supports arrays for the temporary file and that makes it more
usable than Mesa IR.

 - It doesn't have integer instructions or anything else post-GLSL-1.30.

One thing that may be surprising about TGSI (and this may be needed
for Mesa IR too) is that it has 2 IF opcodes. The first one IF
expects a floating-point condition, which is true if condition !=
0.0f. The second one UIF (unsigned integer if) is the typical if
from C. Since registers are untyped, you'll need typed instructions.

 - The optimization passes for it are totally ad-hoc and fairly weak.
 - It's not SSA.

 I'm interested in fixing all of these.  How do people feel about this
 goal?

I don't really have an opinion, because everything is TGSI to me. This
will probably have very little impact on Gallium drivers except maybe
those which don't have an optimizing shader backend.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 06/11] loader: Use drirc device_id parameter in complement to DRI_PRIME

2014-05-29 Thread Axel Davy

On 29/05/2014 04:58, Michel Dänzer wrote :

Still doesn't build here (on top of
http://lists.freedesktop.org/archives/mesa-dev/2014-May/060312.html
to fix today's build failure), see below. This fixes it, but I'm not
sure it's the proper fix:


Could you tell me what are your configure flags?

I tried the patches on top of febec731475a5b288cc50660b2d32780e395b950
and I didn't have any compiling issues, even when building gallium-egl.

Axel Davy
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 06/11] loader: Use drirc device_id parameter in complement to DRI_PRIME

2014-05-29 Thread Michel Dänzer
On 30.05.2014 12:02, Axel Davy wrote:
 On 29/05/2014 04:58, Michel Dänzer wrote :
 Still doesn't build here (on top of
 http://lists.freedesktop.org/archives/mesa-dev/2014-May/060312.html
 to fix today's build failure), see below. This fixes it, but I'm not
 sure it's the proper fix:

 Could you tell me what are your configure flags?
 
 I tried the patches on top of febec731475a5b288cc50660b2d32780e395b950

I can still reproduce it on top of that.


 and I didn't have any compiling issues, even when building gallium-egl.

Here you go:

'../configure'  '--enable-dri3' '--enable-debug' '--with-llvm-shared-libs' 
'--with-llvm-prefix=/home/daenzer/src/llvm-git/llvm/build-amd64/Release+Debug+Asserts'
 '--with-dri-searchpath=/usr/local/lib/x86_64-linux-gnu/dri' 
'--enable-texture-float' '--enable-glx-tls' '--enable-gallium-egl' 
'--enable-gallium-g3dvl' '--enable-gles1' '--enable-gles2' '--enable-opencl' 
'--enable-opencl-icd' '--enable-openvg' '--with-egl-platforms=x11,drm,wayland' 
'--enable-shared-glapi' '--with-gallium-drivers=r300,r600,radeonsi,swrast' 
'--enable-r600-llvm-compiler' '--with-dri-drivers=' 
'USER_CFLAGS=-fno-omit-frame-pointer -O2 -ftree-vectorize -march=amdfam10 
-mtune=native' 'USER_CXXFLAGS=-fno-omit-frame-pointer -O2 -ftree-vectorize 
-march=amdfam10 -mtune=native'


P.S. The patches need to be rebased onto current master.

-- 
Earthling Michel Dänzer|  http://www.amd.com
Libre software enthusiast  |Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 06/11] loader: Use drirc device_id parameter in complement to DRI_PRIME

2014-05-29 Thread Axel Davy

On 29/05/2014 23:31, Michel Dänzer a wrote :

On 30.05.2014 12:02, Axel Davy wrote:


Could you tell me what are your configure flags?

I tried the patches on top of febec731475a5b288cc50660b2d32780e395b950

I can still reproduce it on top of that.



and I didn't have any compiling issues, even when building gallium-egl.

Here you go:

'../configure'  '--enable-dri3' '--enable-debug' '--with-llvm-shared-libs' 
'--with-llvm-prefix=/home/daenzer/src/llvm-git/llvm/build-amd64/Release+Debug+Asserts'
 '--with-dri-searchpath=/usr/local/lib/x86_64-linux-gnu/dri' 
'--enable-texture-float' '--enable-glx-tls' '--enable-gallium-egl' 
'--enable-gallium-g3dvl' '--enable-gles1' '--enable-gles2' '--enable-opencl' 
'--enable-opencl-icd' '--enable-openvg' '--with-egl-platforms=x11,drm,wayland' 
'--enable-shared-glapi' '--with-gallium-drivers=r300,r600,radeonsi,swrast' 
'--enable-r600-llvm-compiler' '--with-dri-drivers=' 
'USER_CFLAGS=-fno-omit-frame-pointer -O2 -ftree-vectorize -march=amdfam10 
-mtune=native' 'USER_CXXFLAGS=-fno-omit-frame-pointer -O2 -ftree-vectorize 
-march=amdfam10 -mtune=native'


P.S. The patches need to be rebased onto current master.



I tried with your compile options (except for the opencl flags, didn't 
have the required libs), and it worked.


Did you try adding -lm to libloader_la_LIBADD ?

Axel Davy.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Fix Line Stipple enable bit in 3DSTATE_SF for Haswell.

2014-05-29 Thread Pavel Popov
Signed-off-by: Pavel Popov pavel.e.po...@intel.com
---
 src/mesa/drivers/dri/i965/brw_defines.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index c38e447..ed8efca 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1694,7 +1694,7 @@ enum brw_message_target {
 /* GEN7/DW1: */
 # define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12
 /* GEN7/DW2: */
-# define HSW_SF_LINE_STIPPLE_ENABLE14
+# define HSW_SF_LINE_STIPPLE_ENABLE(1  14)
 
 # define GEN8_SF_SMOOTH_POINT_ENABLE(1  13)
 
-- 
1.8.3.2



Closed Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park, 
17 Krylatskaya Str., Bldg 4, Moscow 121614, 
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Fix Line Stipple enable bit in 3DSTATE_SF for Haswell.

2014-05-29 Thread Matt Turner
On Thu, May 29, 2014 at 8:50 PM, Pavel Popov pavel.e.po...@intel.com wrote:
 Signed-off-by: Pavel Popov pavel.e.po...@intel.com
 ---
  src/mesa/drivers/dri/i965/brw_defines.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
 b/src/mesa/drivers/dri/i965/brw_defines.h
 index c38e447..ed8efca 100644
 --- a/src/mesa/drivers/dri/i965/brw_defines.h
 +++ b/src/mesa/drivers/dri/i965/brw_defines.h
 @@ -1694,7 +1694,7 @@ enum brw_message_target {
  /* GEN7/DW1: */
  # define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12
  /* GEN7/DW2: */
 -# define HSW_SF_LINE_STIPPLE_ENABLE14
 +# define HSW_SF_LINE_STIPPLE_ENABLE(1  14)

  # define GEN8_SF_SMOOTH_POINT_ENABLE(1  13)

 --
 1.8.3.2

Nice catch!

Reviewed-by: Matt Turner matts...@gmail.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glcpp: link with tests/common.c

2014-05-29 Thread Tapani Pälli
So that prog_hash_table can use _mesa_error_no_memory function.

Signed-off-by: Tapani Pälli tapani.pa...@intel.com
---
 src/glsl/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index fd0e837..00261fd 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -114,6 +114,7 @@ libglcpp_la_SOURCES =   
\
 
 glcpp_glcpp_SOURCES =  \
glcpp/glcpp.c   \
+   tests/common.c  \
$(top_srcdir)/src/mesa/program/prog_hash_table.c
 glcpp_glcpp_LDADD =\
libglcpp.la \
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/20] mesa: Add missing null checks into prog_hash_table.c

2014-05-29 Thread Tapani Pälli
This breaks 'make check', I've send a patch that fixes it for me.

On 05/14/2014 08:55 PM, Juha-Pekka Heikkila wrote:
 Check calloc return values in hash_table_insert() and
 hash_table_replace()

 Signed-off-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com
 ---
  src/mesa/program/prog_hash_table.c | 8 
  1 file changed, 8 insertions(+)

 diff --git a/src/mesa/program/prog_hash_table.c 
 b/src/mesa/program/prog_hash_table.c
 index f45ed46..2445d84 100644
 --- a/src/mesa/program/prog_hash_table.c
 +++ b/src/mesa/program/prog_hash_table.c
 @@ -142,6 +142,10 @@ hash_table_insert(struct hash_table *ht, void *data, 
 const void *key)
  struct hash_node *node;
  
  node = calloc(1, sizeof(*node));
 +if (node == NULL) {
 +   _mesa_error_no_memory(__func__);
 +   return;
 +}
  
  node-data = data;
  node-key = key;
 @@ -167,6 +171,10 @@ hash_table_replace(struct hash_table *ht, void *data, 
 const void *key)
  }
  
  hn = calloc(1, sizeof(*hn));
 +if (hn == NULL) {
 +   _mesa_error_no_memory(__func__);
 +   return false;
 +}
  
  hn-data = data;
  hn-key = key;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/vec4: Allow writemasking on math instructions on Gen7+.

2014-05-29 Thread Matt Turner
The math instruction was Align1-only on Gen6 and we never updated this
to let it use Align16 features like writemasking on newer platforms.

total instructions in shared programs: 1686120 - 1685507 (-0.04%)
instructions in affected programs: 48593 - 47980 (-1.26%)
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 54a40dd..ee52c07 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -326,7 +326,7 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg 
dst, src_reg src)
 {
src = fix_math_operand(src);
 
-   if (dst.writemask != WRITEMASK_XYZW) {
+   if (brw-gen == 6  dst.writemask != WRITEMASK_XYZW) {
   /* The gen6 math instruction must be align1, so we can't do
* writemasks.
*/
@@ -379,7 +379,7 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode,
src0 = fix_math_operand(src0);
src1 = fix_math_operand(src1);
 
-   if (dst.writemask != WRITEMASK_XYZW) {
+   if (brw-gen == 6  dst.writemask != WRITEMASK_XYZW) {
   /* The gen6 math instruction must be align1, so we can't do
* writemasks.
*/
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/10] glsl: parser changes for GL_ARB_explicit_uniform_location

2014-05-29 Thread Tapani Pälli
On 05/28/2014 09:41 PM, Ian Romanick wrote:
 On 05/22/2014 09:37 PM, Tapani Pälli wrote:
 On 05/21/2014 07:52 PM, Ian Romanick wrote:
 On 05/21/2014 08:11 AM, Tapani wrote:
 On 05/21/2014 05:43 PM, Tapani wrote:
 On 05/21/2014 01:27 AM, Ian Romanick wrote:
 On 05/19/2014 10:08 PM, Tapani wrote:
 On 05/19/2014 08:18 PM, Ian Romanick wrote:
 On 04/09/2014 02:56 AM, Tapani Pälli wrote:
 diff --git a/src/glsl/glsl_parser_extras.h
 b/src/glsl/glsl_parser_extras.h
 index c53c583..20879a0 100644
 --- a/src/glsl/glsl_parser_extras.h
 +++ b/src/glsl/glsl_parser_extras.h
 @@ -152,6 +152,20 @@ struct _mesa_glsl_parse_state {
  return true;
   }
+   bool check_explicit_uniform_location_allowed(YYLTYPE *locp,
 +const ir_variable
 *var)
 +   {
 +  /* Requires OpenGL 3.3 or ARB_explicit_attrib_location. */
 +  if (ctx-Version  33 
 !ctx-Extensions.ARB_explicit_attrib_location) {
 + _mesa_glsl_error(locp, this, %s explicit location
 requires 
 + GL_ARB_explicit_attrib_location extension 
 +  or OpenGL 3.3, mode_string(var));
 Many copy-and-paste bugs. :) Explicit uniform locations aren't added
 until 4.3.
 It may look copy-paste but the specification states that Requires
 OpenGL 3.3 or ARB_explicit_attrib_location:

 https://www.opengl.org/registry/specs/ARB/explicit_uniform_location.txt

 Using 4.3 capable driver will pass this check correctly.
 Oh right, because it relies on 3.3 or ARB_explicit_attib_location to
 add the layout keyword.  Some comments explaining that this isn't a
 copy-and-paste bug will prevent the next person from also thinking that
 it is. :)

 But this code should check the version (and extension) bits set in the
 shader, not what's enabled in the context. How about:

 bool check_explicit_attrib_location_allowed(YYLTYPE *locp,
 const ir_variable *var)
 {
if (!this-has_explicit_attrib_location() ||
!this-ARB_explicit_uniform_location_enable) {
   _mesa_glsl_error(locp, this,
uniform explicit location requires
GL_ARB_explicit_uniform_location and
 either 
GL_ARB_explicit_attrib_location or GLSL
 330.);
   return false;
}

return true;
 }
 Sure, this is fine by me. I'll send new patches soon.

 Or maybe fine with some changes since my piglit tests won't pass with
 this change (for those explicit attrib location is not available for
 some reason (!)), will take a look.
 Do the tests enable it via #extension?
 They enable GL_ARB_explicit_uniform_location but not
 GL_ARB_explicit_attrib_location and I think that is the way it should
 work. I don't understand why checking the existence of
 explicit_attrib_location from context is not correct way to deal with
 this? It doesn't need to be enabled in the language as layout token will
 be there also if just explicit_uniform_location is enabled.
 There are two places that we need to check extension or version related
 things in the compiler.

 1. Check that the driver supports a particular extension when a shader
 tries to enable the functionality (via #extension).  This is handled by
 the _mesa_glsl_supported_extensions table in glsl_parser_extras.cpp.

 2. Check that the shader has enabled the extension when it tries to use
 some functionality from that extension.  This is handled by either
 checking the appropriate state-foo_enable flag directly or using one of
 the state-has_foo or state-check_foo methods.  The methods are used
 for cases where a feature is enabled by multiple extensions or an
 extension and a GLSL version.

 Now, for this case...

 I think the intention of the spec language is that the layout()
 qualifier is added by either GL_ARB_explicit_attrib_location or GLSL
 3.30.  The layout qualifier applied to a uniform is further added by
 GL_ARB_explicit_uniform_location.

 So... I think an application needs (GLSL 330 ||
 GL_ARB_explicit_attrib_location)  GL_ARB_explicit_uniform_location.
 Meaning that either

 #version 330
 #extension GL_ARB_explicit_uniform_location: enable

This is how Nvidia binary driver works, it requires '#version 330'.

 or

 #version 120 // for example
 #extension GL_ARB_explicit_attrib_location: enable
 #extension GL_ARB_explicit_uniform_location: enable

I don't understand why user needs to worry about dependencies between
extensions here but maybe it's just me and I'm ok to change the check to
this approach, this is how it would work with the proposed changes applied.

 I am curious to see what other implementatons do... if they do something
 different from what I have said, I'll submit a spec bug so that it's
 more clear.

 // Tapani

// Tapani

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev