date:20151118

[Mesa-dev] [PATCH 2/4] glsl: Silence unused parameter warnings

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

builtin_functions.cpp:5289:52: warning: unused parameter 'num_arguments' 
[-Wunused-parameter]
   unsigned num_arguments,
^
builtin_functions.cpp:5290:52: warning: unused parameter 'flags' 
[-Wunused-parameter]
   unsigned flags)
^

Signed-off-by: Ian Romanick 
Cc: Ilia Mirkin 
---
 src/glsl/builtin_functions.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 1349444..eb438d9 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -5243,8 +5243,8 @@ builtin_builder::_image_size_prototype(const glsl_type 
*image_type,
 
 ir_function_signature *
 builtin_builder::_image_samples_prototype(const glsl_type *image_type,
-  unsigned num_arguments,
-  unsigned flags)
+  unsigned /* num_arguments */,
+  unsigned /* flags */)
 {
ir_variable *image = in_var(image_type, "image");
ir_function_signature *sig =
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeon: ensure that timing/profiling queries are suspended on flush

2015-11-18 Thread Nicolai Hähnle

The queries_suspended_for_flush flag is redundant because suspended queries
are not removed from their respective linked list.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 13 ++---
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 --
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 60be412..f03dcd9 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -27,6 +27,7 @@
 #include "r600_pipe_common.h"
 #include "r600_cs.h"
 #include "tgsi/tgsi_parse.h"
+#include "util/list.h"
 #include "util/u_draw_quad.h"
 #include "util/u_memory.h"
 #include "util/u_format_s3tc.h"
@@ -135,12 +136,10 @@ static void r600_memory_barrier(struct pipe_context *ctx, 
unsigned flags)
 void r600_preflush_suspend_features(struct r600_common_context *ctx)
 {
/* suspend queries */
-   ctx->queries_suspended_for_flush = false;
-   if (ctx->num_cs_dw_nontimer_queries_suspend) {
+   if (!LIST_IS_EMPTY(>active_nontimer_queries))
r600_suspend_nontimer_queries(ctx);
+   if (!LIST_IS_EMPTY(>active_timer_queries))
r600_suspend_timer_queries(ctx);
-   ctx->queries_suspended_for_flush = true;
-   }
 
ctx->streamout.suspended = false;
if (ctx->streamout.begin_emitted) {
@@ -157,10 +156,10 @@ void r600_postflush_resume_features(struct 
r600_common_context *ctx)
}
 
/* resume queries */
-   if (ctx->queries_suspended_for_flush) {
-   r600_resume_nontimer_queries(ctx);
+   if (!LIST_IS_EMPTY(>active_timer_queries))
r600_resume_timer_queries(ctx);
-   }
+   if (!LIST_IS_EMPTY(>active_nontimer_queries))
+   r600_resume_nontimer_queries(ctx);
 }
 
 static void r600_flush_from_st(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index f9fecdf..253d657 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -397,8 +397,6 @@ struct r600_common_context {
struct list_headactive_timer_queries;
unsignednum_cs_dw_nontimer_queries_suspend;
unsignednum_cs_dw_timer_queries_suspend;
-   /* If queries have been suspended. */
-   boolqueries_suspended_for_flush;
/* Additional hardware info. */
unsignedbackend_mask;
unsignedmax_db; /* for OQ */
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] glsl: Silence ignored qualifier warning

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

I think the intention was to mark the "this" parameter as const, but
const goes on the other end to do that.

In file included from glsl_symbol_table.cpp:26:0:
ast.h:339:35: warning: type qualifiers ignored on function return type 
[-Wignored-qualifiers]
const bool is_single_dimension()
   ^

Signed-off-by: Ian Romanick 
Cc: Timothy Arceri 
---
 src/glsl/ast.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 1b75234..ae76342 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -336,7 +336,7 @@ public:
   array_dimensions.push_tail(>link);
}
 
-   const bool is_single_dimension()
+   bool is_single_dimension() const
{
   return this->array_dimensions.tail_pred->prev != NULL &&
  this->array_dimensions.tail_pred->prev->is_head_sentinel();
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] glsl: Fix off-by-one error in array size check assertion

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

Apparently, this has been a bug since 2010 (c30f6e5d).

Also use ARRAY_SIZE instead of open coding it.

Signed-off-by: Ian Romanick 
Cc: Kenneth Graunke 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/glsl/ir.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 8b5ba71..80cbdbf 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1425,8 +1425,7 @@ static const char * const tex_opcode_strs[] = { "tex", 
"txb", "txl", "txd", "txf
 
 const char *ir_texture::opcode_string()
 {
-   assert((unsigned int) op <=
- sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]));
+   assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs));
return tex_opcode_strs[op];
 }
 
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] mesa: Don't expose GL_EXT_shader_integer_mix in GLES 1.x

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

There are no shaders, so it doesn't even make sense to expose the
extension.

Signed-off-by: Ian Romanick 
Cc: Nanley Chery 
---
 src/mesa/main/extensions_table.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index d12fd9f..8685a89 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -176,7 +176,7 @@ EXT(EXT_rescale_normal  , dummy_true
 EXT(EXT_secondary_color , dummy_true   
  , GLL,  x ,  x ,  x , 1999)
 EXT(EXT_separate_shader_objects , dummy_true   
  ,  x ,  x ,  x , ES2, 2013)
 EXT(EXT_separate_specular_color , dummy_true   
  , GLL,  x ,  x ,  x , 1997)
-EXT(EXT_shader_integer_mix  , EXT_shader_integer_mix   
  , GLL, GLC, ES1,  30, 2013)
+EXT(EXT_shader_integer_mix  , EXT_shader_integer_mix   
  , GLL, GLC,  x ,  30, 2013)
 EXT(EXT_shadow_funcs, ARB_shadow   
  , GLL,  x ,  x ,  x , 2002)
 EXT(EXT_stencil_two_side, EXT_stencil_two_side 
  , GLL,  x ,  x ,  x , 2001)
 EXT(EXT_stencil_wrap, dummy_true   
  , GLL,  x ,  x ,  x , 2002)
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] radeonsi/compute: Use the compiler's COMPUTE_PGM_RSRC* register values

2015-11-18 Thread Emil Velikov

Hi Tom,

Please flip the order of the patches and drop the now patch 1/2 from
the stable queue.

On 16 November 2015 at 20:03, Tom Stellard  wrote:
> The compiler has more information and is able to optimize the bits
> it sets in these registers.
>
> CC: 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 37 
> ++-
>  src/gallium/drivers/radeonsi/si_shader.c  |  2 ++
>  2 files changed, 9 insertions(+), 30 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 2d551dd..a461b2c 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -34,11 +34,6 @@
>
>  #define MAX_GLOBAL_BUFFERS 20
>
> -/* XXX: Even though we don't pass the scratch buffer via user sgprs any more
> - * LLVM still expects that we specify 4 USER_SGPRS so it can remain 
> compatible
> - * with older mesa. */
> -#define NUM_USER_SGPRS 4
> -
>  struct si_compute {
> struct si_context *ctx;
>
> @@ -238,7 +233,6 @@ static void si_launch_grid(
> uint64_t kernel_args_va;
> uint64_t scratch_buffer_va = 0;
> uint64_t shader_va;
> -   unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
> unsigned i;
> struct si_shader *shader = >shader;
> unsigned lds_blocks;
> @@ -366,19 +360,7 @@ static void si_launch_grid(
> si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
> si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
>
> -   si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1,
> -   /* We always use at least 3 VGPRS, these come from
> -* TIDIG_COMP_CNT.
> -* XXX: The compiler should account for this.
> -*/
> -   S_00B848_VGPRS((MAX2(3, shader->num_vgprs) - 1) / 4)
> -   /* We always use at least 4 + arg_user_sgpr_count.  The 4 
> extra
> -* sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN
> -* XXX: The compiler should account for this.
> -*/
> -   |  S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count,
> -   shader->num_sgprs)) - 1) / 8)
> -   |  S_00B028_FLOAT_MODE(shader->float_mode))
> +   si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->rsrc1);
> ;
The above semicolon should be nuked as well, shouldn't it ?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] llvm TGSI backend (WIP) questions

2015-11-18 Thread Tom Stellard

On Wed, Nov 18, 2015 at 03:53:37PM +0100, Hans de Goede wrote:
> Hi,
> 
> On 13-11-15 19:51, Tom Stellard wrote:
> > On Fri, Nov 13, 2015 at 02:46:52PM +0100, Hans de Goede wrote:
> >> Hi All,
> >>
> >> So as discussed I've started working on a TGSI backend for
> >> llvm to use as a way to get compute going on nouveau (and other gpu-s).
> >>
> >> I'm still learning all the ins and outs of llvm so I do not have
> >> much to show yet.
> >>
> >> I've rebased Francisco's (curro's) latest version on top of llvm
> >> trunk, and added a commit on top to actual get it build with the
> >> latest trunk. So currently I'm at the point where I've just
> >> taken Francisco's code, and made it compile, no more and no less.
> >>
> >> I have a git repo with this work available here:
> >>
> >> http://cgit.freedesktop.org/~jwrdegoede/llvm/
> >>
> >> So the next step would be to test this and see if it actually
> >> does anything, questions:
> >>
> >> 1) Does anyone have a simple test case / command where I can
> >> invoke just llvm and get TGSI asm output to check ?
> >>
> >
> > The easiest way to do this is with the llc tool which ships with llvm.
> > It compiles LLVM IR to target code, which in this case is tgsi.
> > I would recommend taking one of the simple examples from
> > test/CodeGen/AMDGPU (you may need to get these from llvm trunk, not sure
> > what llvm version you are using).
> >
> > To use llc:
> >
> > llc -march=tgsi input.ll -o -
> >
> >
> > This will output TGSI.
> 
> So after some bugfixing to fix a bunch of segfaults I get:
> 
> $ bin/llc -march=tgsi ../test/CodeGen/AMDGPU/add.ll -o -
> 
> # BB#0:
>  UADDs TEMP0x, TEMP0x, 0
>  LOADgis TEMP1z, [TEMP1y]
>  UADDs TEMP1y, TEMP1y, 4
>  LOADgis TEMP1y, [TEMP1y]
>  UADDs TEMP1y, TEMP1z, TEMP1y
>  STOREgis [TEMP1x], TEMP1y
>  UADDs TEMP0x, TEMP0x, 0
>  RET
>  ENDSUB
> 
> and add.ll has:
> 
> ;FUNC-LABEL: {{^}}test1:
> ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> 
> ;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}}
> ;SI-NOT: [[REG]]
> ;SI: buffer_store_dword [[REG]],
> define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
>%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
>%a = load i32, i32 addrspace(1)* %in
>%b = load i32, i32 addrspace(1)* %b_ptr
>%result = add i32 %a, %b
>store i32 %result, i32 addrspace(1)* %out
>ret void
> }
> 
> So the generated code for test1 resmbles the input somewhat but is in no way 
> correct,
> e.g. I do not understand why it is assuming that both TEMP0x and TEMP1z 
> contain the
> address of the array with the 2 input integers. Nor do I understand why it is 
> using
> TEMP1z and TEMP1y as sources for the UADD, where it has been doing the LOAD-s 
> to
> TEMP0x and and TEMP1y
> 

The placement of inputs into registers is controlled by the calling
convention, which is implemented in TGSIISelLowering.cpp and file
called probably called something like TGSICallingConv.td.

Maybe I'm reading the assembly wrong, but it looks like values are being
loaded into TEMP1z and TEMP1y not TEMP0x and TEMP1y.

-Tom

> And then we've function test2 in add.ll
> 
> ;FUNC-LABEL: {{^}}test2:
> ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> 
> ;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
> ;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
> 
> define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) 
> {
>%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
>%a = load <2 x i32>, <2 x i32> addrspace(1)* %in
>%b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
>%result = add <2 x i32> %a, %b
>store <2 x i32> %result, <2 x i32> addrspace(1)* %out
>ret void
> }
> 
> Which completely makes the tgsi backend unhappy:
> 
> LLVM ERROR: Cannot select: t43: i32,ch = load 
> t45:1, FrameIndex:i32<0>, undef:i32
> t41: i32 = FrameIndex<0>
> t8: i32 = undef
> In function: test2
> 
> Any hints on where to start looking with fixing these issues would be much
> appreciated.
> 
> Regards,
> 
> Hans
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965/fs: Replace nested ternary with if ladder.

2015-11-18 Thread Francisco Jerez

Matt Turner  writes:

> Since the types of the expression were
>
>bool ? src_reg : (bool ? brw_reg : brw_reg)
>
> the result of the second (nested) ternary would be implicitly
> converted to a src_reg by the src_reg(struct brw_reg) constructor. I.e.,
>
>bool ? src_reg : src_reg(bool ? brw_reg : brw_reg)
>
> In the next patch, I make backend_reg (the parent of src_reg) inherit
> from brw_reg, which changes this expression to return brw_reg, which
> throws away any fields that exist in the classes derived from brw_reg.
> I.e.,
>
>src_reg(bool ? brw_reg(src_reg) : bool ? brw_reg : brw_reg)
>

The fundamental problem here has nothing to do with ternary operators,
but with the fact that you made backend_reg inherit from brw_reg even
though there is no is-a relationship -- More precisely, the Liskov
substitution principle doesn't hold: Pass a valid program written in
terms of brw_reg (e.g. brw_MOV()) an fs_reg (or similarly any of the
vec4 register objects) and you've got a bug (AKA object slicing).  The
reason is that fs_regs have additional structure and restrictions that a
valid brw_reg program won't take into account (e.g. additional offset,
stride, reladdr), and even the methods shared with the base brw_reg
class have subtly different semantics.  A proper conversion from fs_reg
to brw_reg amounts to more than just slicing off the additional
structure of the fs_reg (c.f. brw_reg_from_fs_reg()), what points at
there being an implemented-in-terms-of relationship rather than an is-a
relationship.

The typical approach to solve this problem is use private inheritance
and explicitly expose the fields from the base class you want to be
public using "using" directives.  That will prevent implicit (and
generally incorrect) conversion of register subclasses into brw_reg.

Another somewhat more wordy approach is use aggregation and expose any
interesting fields from the brw_reg using accessors, or use completely
unrelated representations for backend_reg and brw_reg and perform the
conversion manually (kind of like was done previously, but replacing the
fixed_hw_reg thing with precisely the subset of brw_reg fields required
to represent HW_REGs and immediates orthogonally with respect to the
remaining backend_reg fields).

> Generally this code was gross, and wasn't actually shorter or easier to
> read than an if ladder.

Generally I find it gross to split up a pure expression into separate
statements even though there's no control-flow relationship intended.
The widely used pattern:

| condition0 ? expression0 :
| condition1 ? expression1 :
| /* ... */
| conditionN ? expressionN :
| default-expression

is at least as readable as the equivalent if-ladder (although this point
is highly subjective) and frequently saves you from introducing mutable
variables that only ever need to be assigned once and after that point
remain constant even though they're not explicitly marked as constants
(what obscures the dataflow of the program -- This last point is
probably not subjective at all, but it doesn't apply to this particular
example).  In other cases chained ternary operators can simplify code by
allowing the factorization of duplicated code like:

| p ? f(x) :
| q ? f(y) :
| /* ... */ :
| f(z)

  ->

| f(p ? x :
|   q ? y :
|   /* ... */ :
|   z)

In this particular case the duplicated "f" factor you added is just a
trivial return statement so this benefit is not too important in this
case either, and you're left with the purely subjective matter of which
is the most readable -- In doubt I tend to prefer ternary operators
instinctively when the alternatives are pure expressions simply for
consistency and brevity.  You may feel otherwise and I'm not going to
complain about this change because the difference is purely subjective
-- But for that same reason calling this "gross" seems like a rather
gross overstatement to me.

> ---
>  src/mesa/drivers/dri/i965/brw_fs_builder.h | 13 +++--
>  1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h 
> b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> index f121f34..d5763f6 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> @@ -224,12 +224,13 @@ namespace brw {
>src_reg
>sample_mask_reg() const
>{
> - const bool uses_kill =
> -(shader->stage == MESA_SHADER_FRAGMENT &&
> - ((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill);
> - return (shader->stage != MESA_SHADER_FRAGMENT ? src_reg(0x) :
> - uses_kill ? brw_flag_reg(0, 1) :
> - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
> + if (shader->stage != MESA_SHADER_FRAGMENT) {
> +return src_reg(0x);
> + } else if (((brw_wm_prog_data 
> *)shader->stage_prog_data)->uses_kill) {
> +return brw_flag_reg(0, 1);
> + } else {
> +

[Mesa-dev] [Bug 92985] Mac OS X build error "ar: no archive members specified"

2015-11-18 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=92985

--- Comment #3 from Emil Velikov  ---
(In reply to Martin Peres from comment #2)
> (In reply to Emil Velikov from comment #1)
> > I'm assuming that this fails as said ar expects to create a non empty
> > archive. 
> > Thus moving the noinst_LTLIBRARIES += libloader_dri3_helper.la into the if
> > HAVE_DRI3 section should fix things.
> > 
> > Seems that we're also missing the XCB_DRI3_CFLAGS in the
> > src/loader/Makefile.am
> 
> Pretty sure I tested that creating an empty libloader_dri3_helper.la was no
> problem on Linux. It may be on Mac OS.
> 
Same here - things work fine on my Linux boxes, and from the log this is the
only thing which comes to mind.

> In any case, I will add it to my list of stuff to do tomorrow, after
> figuring out why kwin fails when using EGL (spent some time on it today, the
> EGLConfig is empty(??)) and fixed the export of the symbols (unless Emil
> wants to do it first).
Mind sending the symbols fixes to the ML - I'll take a look at adding a test to
`make check' to prevent this from happening in the future. Alongside it I'll
look at this bug as well.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/5] i965/nir: use vectorization for non-scalar stages

2015-11-18 Thread Jason Ekstrand

On Tue, Nov 17, 2015 at 11:41 PM, Connor Abbott  wrote:
> On Mon, Nov 16, 2015 at 11:00 AM, Jason Ekstrand  wrote:
>> On Sat, Nov 14, 2015 at 6:59 PM, Connor Abbott  wrote:
>>> Shader-db results on bdw with INTEL_DEBUG=vec4:
>>>
>>> total instructions in shared programs: 1634044 -> 1612936 (-1.29%)
>>> instructions in affected programs: 802502 -> 781394 (-2.63%)
>>> helped: 5036
>>> HURT: 1442
>>>
>>> total cycles in shared programs: 9397790 -> 9355382 (-0.45%)
>>> cycles in affected programs: 5078600 -> 5036192 (-0.84%)
>>> helped: 3875
>>> HURT: 2554
>>>
>>> LOST:   0
>>> GAINED: 0
>>>
>>> Most of the hurt programs seem to be because we generate extra MOV's due
>>> to vectorizing things. For example, in
>>> shaders/non-free/steam/anomaly-2/158.shader_test, this:
>>>
>>> add(8)  g116<1>.xyF g12<4,4,1>.xyyyF g1.4<0,4,1>.xyyyF { 
>>> align16 NoDDClr 1Q };
>>> add(8)  g117<1>.xyF g12<4,4,1>.xyyyF g1.4<0,4,1>.zwwwF { 
>>> align16 NoDDClr 1Q };
>>> add(8)  g116<1>.zwF g12<4,4,1>.xxxyF -g1.4<0,4,1>.xxxyF { 
>>> align16 NoDDChk 1Q };
>>> add(8)  g117<1>.zwF g12<4,4,1>.xxxyF -g1.4<0,4,1>.zzzwF { 
>>> align16 NoDDChk 1Q };
>>>
>>> Turns into this:
>>>
>>> add(8)  g13<1>F g12<4,4,1>.xyxyF g1.4<0,4,1>F   { align16 
>>> 1Q };
>>> add(8)  g14<1>F g12<4,4,1>.xyxyF -g1.4<0,4,1>F  { align16 
>>> 1Q };
>>> mov(8)  g116<1>.xyD g13<4,4,1>.xyyyD{ align16 
>>> NoDDClr 1Q };
>>> mov(8)  g117<1>.xyD g13<4,4,1>.zwwwD{ align16 
>>> NoDDClr 1Q };
>>> mov(8)  g116<1>.zwD g14<4,4,1>.xxxyD{ align16 
>>> NoDDChk 1Q };
>>> mov(8)  g117<1>.zwD g14<4,4,1>.zzzwD{ align16 
>>> NoDDChk 1Q };
>>>
>>> So we eliminated two add's, but then had to introduce four mov's to
>>> transpose the result. I don't think there's much we can do about this at
>>> the NIR level, unfortunately.
>>
>> Given the shader-db numbers above, I think we can probably eat the
>> hurt programs.  Would you mind cherry-picking back onto a time when we
>> had GLSL IR and doing a GLSL IR vs. NIR comparison with this series?
>> This is one of the places we were still hurting so it would be good to
>> know how it changes the picture.  Not that it *really* matters at this
>> point...
>
> So I rebased the series onto right before we switched to NIR by
> default. For non-NIR vs. NIR, the numbers were:
>
> total instructions in shared programs: 1848629 -> 1647133 (-10.90%)
> instructions in affected programs: 1663319 -> 1461711 (-12.12%)
> total loops in shared programs:223 -> 222 (-0.45%)
> helped:14667
> HURT:  1416
> GAINED:0
> LOST:  0
>
> and for non-NIR vs. NIR + vectorization:
>
> total instructions in shared programs: 1848629 -> 1619388 (-12.40%)
> instructions in affected programs: 1696613 -> 1467260 (-13.52%)
> total loops in shared programs:223 -> 222 (-0.45%)
> helped:15076
> HURT:  1378
> GAINED:0
> LOST:  0
>
> so, indeed, it does seem to get rid of some hurt programs. My
> suspicion, though, is that it's a case of "well, NIR hurt, but the
> gain from vectorizing offset the (unrelated) problem so that now it
> helps." We don't ever split up vectorized operations in NIR (except
> when lowering to scalar, ofc) so NIR shouldn't be doing anything
> harmful that this pass would help.

Mostly, I wanted to see how big the numbers were :-)  Also, we did see
some vectorization issues in some of the shaders that were hurt.
--Jason

>>
>>> Signed-off-by: Connor Abbott 
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_nir.c | 8 
>>>  1 file changed, 8 insertions(+)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
>>> b/src/mesa/drivers/dri/i965/brw_nir.c
>>> index fe5cad4..29cafe6 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_nir.c
>>> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
>>> @@ -198,6 +198,14 @@ nir_optimize(nir_shader *nir, bool is_scalar)
>>>nir_validate_shader(nir);
>>>progress |= nir_opt_cse(nir);
>>>nir_validate_shader(nir);
>>> +
>>> +  if (!is_scalar) {
>>> + progress |= nir_opt_vectorize(nir);
>>> + nir_validate_shader(nir);
>>> + progress |= nir_copy_prop(nir);
>>> + nir_validate_shader(nir);
>>> +  }
>>> +
>>>progress |= nir_opt_peephole_select(nir);
>>>nir_validate_shader(nir);
>>>progress |= nir_opt_algebraic(nir);
>>> --
>>> 2.4.3
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/4] glsl: Silence unused parameter warnings

2015-11-18 Thread Ilia Mirkin

Reviewed-by: Ilia Mirkin 

On Wed, Nov 18, 2015 at 12:44 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> builtin_functions.cpp:5289:52: warning: unused parameter 'num_arguments' 
> [-Wunused-parameter]
>unsigned num_arguments,
> ^
> builtin_functions.cpp:5290:52: warning: unused parameter 'flags' 
> [-Wunused-parameter]
>unsigned flags)
> ^
>
> Signed-off-by: Ian Romanick 
> Cc: Ilia Mirkin 
> ---
>  src/glsl/builtin_functions.cpp | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
> index 1349444..eb438d9 100644
> --- a/src/glsl/builtin_functions.cpp
> +++ b/src/glsl/builtin_functions.cpp
> @@ -5243,8 +5243,8 @@ builtin_builder::_image_size_prototype(const glsl_type 
> *image_type,
>
>  ir_function_signature *
>  builtin_builder::_image_samples_prototype(const glsl_type *image_type,
> -  unsigned num_arguments,
> -  unsigned flags)
> +  unsigned /* num_arguments */,
> +  unsigned /* flags */)
>  {
> ir_variable *image = in_var(image_type, "image");
> ir_function_signature *sig =
> --
> 2.1.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] glsl: Fix off-by-one error in array size check assertion

2015-11-18 Thread Kenneth Graunke

On Wednesday, November 18, 2015 09:44:34 AM Ian Romanick wrote:
> From: Ian Romanick 
> 
> Apparently, this has been a bug since 2010 (c30f6e5d).
> 
> Also use ARRAY_SIZE instead of open coding it.
> 
> Signed-off-by: Ian Romanick 
> Cc: Kenneth Graunke 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/glsl/ir.cpp | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
> index 8b5ba71..80cbdbf 100644
> --- a/src/glsl/ir.cpp
> +++ b/src/glsl/ir.cpp
> @@ -1425,8 +1425,7 @@ static const char * const tex_opcode_strs[] = { "tex", 
> "txb", "txl", "txd", "txf
>  
>  const char *ir_texture::opcode_string()
>  {
> -   assert((unsigned int) op <=
> -   sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]));
> +   assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs));
> return tex_opcode_strs[op];
>  }
>  
> 

Good catch!

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/5] util/set: don't compare against deleted entries

2015-11-18 Thread Jason Ekstrand

On Sat, Nov 14, 2015 at 6:59 PM, Connor Abbott  wrote:
> Not sure how this wasn't already caught by valgrind, but it fixes an
> issue with the vectorizer.

Ugh... I'm getting tired of fixing these bugs.

> Signed-off-by: Connor Abbott 

Both are

Cc: "11.0" 
Reviewed-by: Jason Ekstrand 

> ---
>  src/util/set.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/util/set.c b/src/util/set.c
> index f01f869..331ff58 100644
> --- a/src/util/set.c
> +++ b/src/util/set.c
> @@ -282,7 +282,8 @@ set_add(struct set *ht, uint32_t hash, const void *key)
> * If freeing of old keys is required to avoid memory leaks,
> * perform a search before inserting.
> */
> -  if (entry->hash == hash &&
> +  if (entry_is_present(entry) &&
> +  entry->hash == hash &&
>ht->key_equals_function(key, entry->key)) {
>   entry->key = key;
>   return entry;
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] nir: fix typo in idiv lowering, causing large-udiv-udiv failures

2015-11-18 Thread Emil Velikov

Hi Ilia,

On 11 November 2015 at 00:28, Ilia Mirkin  wrote:
> On Tue, Nov 10, 2015 at 7:24 PM, Connor Abbott  wrote:
>> On Tue, Nov 10, 2015 at 7:02 PM, Ilia Mirkin  wrote:
>>> On Tue, Nov 10, 2015 at 6:44 PM, Eric Anholt  wrote:
 Ilia Mirkin  writes:

> In nv50, and in the python script that Rob circulated, we do:
>
>bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), TYPE_U32, m, b);
>
> Do the same in the nir div lowering pass. This fixes the large-udiv-udiv
> piglit tests on freedreno.

 I assume you meant *-op-div-large-uint-uint.shader_test.
>>>
>>> Yes.
>>>

 vc4 doesn't have uge yet, but I've got a patch to add it and it does
 fix one subtest.  What this lowering pass is actually doing has never
 really made sense to me, but it works, so:

 Acked-by: Eric Anholt 
>>>
>>> It's a magical sequence of non-sensical operations which appear to
>>> produce the proper result with high probability... what's so confusing
>>> about that? :)
>>>
>>> More seriously, I think there are Newton-Raphson overtones in what
>>> it's doing, but I never fully traced it down. It kind of loses me
>>> after subtracting 2 from the integer representation of the float bits.
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>> Where did you get it from? Is there a paper somewhere explaining it? I
>> sort of have a morbid curiosity, perhaps because I got sucked into
>> implementing division/sqrt/rsq for doubles.
>
> I made the old freedreno impl based on nv50's div lowering code.
> There's similarly weird code for r600, but I don't know if the r600 or
> nv50 code came first, or where they came from. FWIW the nvc0 div logic
> is quite different (implemented as a function call, which does rather
> different things... I think). I also don't know where that came from.
>
Just checking that this hasn't fallen through the cracks. Afaics the
only 'issue' with going forward is that no one has reviewed the patch
yet, is that right ?


Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Introducing OpenSWR: High performance software rasterizer

2015-11-18 Thread Emil Velikov

Hi Tim,

I have no objections against getting this merged, although here are a
couple of things that should be sorted. Some of these are just
reiteration from others:

 - First and foremost - please base your work against master. Mesa,
alike most other open-source projects, tries to keep features out of
bugfix releases. As such basing things against 11.0 is not suitable.

 - Further combinatorial explosion of build configurations - with
internal/external core, swr-arch, etc. Some of these can (should?) be
nuked, although further comments will follow as patch(es) hit the
mailing list.

 - Using llvm's C++ interface, building against multiple LLVM
versions. If openswr only supports only limited versions of llvm, then
the build should bail out accordingly - more comments/suggestions as
patch(es) hit the ML.

 - Will patches porting core openswr functionality from the internal
tree be part of the public discussions ? The VMWare people have done a
great thing trying to keep things open, and people have, on the rare
occasion, found nitpicks in their patches.

 - And last but not least - please split patches sensibly, for your
submission and further work). The "Initial public Mesa+SWR" touches
files in quite a few different places.

Mildly related - I'll be resending/merging a series with reworks
things in src/gallium/auxiliary/target-helpers/ so things might clash
as you rebase your work.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3] llvmpipe: disable VSX in ppc due to LLVM PPC bug

2015-11-18 Thread Jose Fonseca


On 18/11/15 18:52, Oded Gabbay wrote:

On Tue, Nov 17, 2015 at 10:35 PM, Oded Gabbay  wrote:

This patch disables the use of VSX instructions, as they cause some
piglit tests to fail

For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7

With this patch, ppc64le reaches parity with x86-64 as far as piglit test
suite is concerned.

v2:
- Added check that we have at least LLVM 3.4
- Added the LLVM bug URL as a comment in the code

v3:

- Only disable VSX if Altivec is supported, because if Altivec support
is missing, then VSX support doesn't exist anyway.

- Change original patch description.

Signed-off-by: Oded Gabbay 
Cc: "11.0" 
---
  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 9 +
  1 file changed, 9 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 7bda118..3ee708f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -536,6 +536,15 @@ 
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,

  #if defined(PIPE_ARCH_PPC)
 MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
+#if HAVE_LLVM >= 0x0304
+   /*
+* Make sure VSX instructions are disabled
+* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7
+*/
+   if (util_cpu_caps.has_altivec) {
+  MAttrs.push_back("-vsx");
+   }
+#endif
  #endif

 builder.setMAttrs(MAttrs);
--
2.5.0



Hi,

I assume the V3 fixes your comments.
Could you please give me an r-b ?

Thanks,

Oded


Thanks.

Reviewed-by: Jose Fonseca 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] mesa: fix error type for GetFramebufferAttachmentParameteriv

2015-11-18 Thread Emil Velikov

Hi Tapani,

On 28 October 2015 at 13:27, Tapani Pälli  wrote:
> On 10/27/2015 06:42 PM, Ian Romanick wrote:
>>
>> On 10/27/2015 12:11 AM, Tapani Pälli wrote:
>>>
>>> Fixes following failing dEQP test:
>>> dEQP-GLES3.functional.fbo.api.attachment_query_empty_fbo
>>>
>>> Signed-off-by: Tapani Pälli 
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92649
>>> Cc: "11.0" 
>>> ---
>>>   src/mesa/main/fbobject.c | 5 +++--
>>>   1 file changed, 3 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
>>> index fe6bdc2..d91fb4a 100644
>>> --- a/src/mesa/main/fbobject.c
>>> +++ b/src/mesa/main/fbobject.c
>>> @@ -3540,8 +3540,9 @@ _mesa_get_framebuffer_attachment_parameter(struct
>>> gl_context *ctx,
>>>  const struct gl_renderbuffer_attachment *att;
>>>  GLenum err;
>>>   -   /* The error differs in GL and GLES. */
>>> -   err = _mesa_is_desktop_gl(ctx) ? GL_INVALID_OPERATION :
>>> GL_INVALID_ENUM;
>>> +   /* The error differs between GL/GLES3 and GLES 2.0. */
>>
>> Could we get quotations from the various specs here? That would have
>> saved me some time. The commit log for 000896c0 as the GLES2 reference.
>> For GLES 3.0,
>>
>> Section 6.1.13 (Framebuffer Object Queries) of the OpenGL ES 3.0.4 spec
>> says:
>>
>> "If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, no
>> framebuffer is bound to target. In this case querying pname
>> FRAMEBUFFER_ATTACHMENT_OBJECT_NAME will return zero, and all other
>> queries will generate an INVALID_OPERATION error."
>>
>> The GLES2 reference in the commit above is Section 6.1.3 (Enumerated
>> Queries).
>>
>> I feel a bit conflicted about this change.  Section F.2 (Differences in
>> Runtime Behavior) of the OpenGL ES 3.0.4 spec lists three subtle (but
>> possibly important) differences between ES2 and ES3.  This list does
>> *not* include changed errors.  I don't have any specific recollection
>> about these changes, but this causes me to believe that these are
>> corrects to ES2.
>>
>> What do the CTS and dEQP do if you...
>>
>> - Always generate GL_INVALID_OPERATION.
>>
>> - Only return an ES2 context.
>>
>> If all the GLES2 conformance tests and the dEQP-GLES2 tests still pass,
>> I would be inclined to just always do GL_INVALID_OPERATION.
>
>
> Yes it seems we can do that, there's no CTS or dEQP regressions when always
> return GL_INVALID_OPERATION.
>
Doesn't seem like this patch landed in master yet. Based on the above
discussion should we drop this off the list and look into a fix
elsewhere in mesa or we're just going to apply the behaviour for all
GL/GLES versions  ?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3] llvmpipe: disable VSX in ppc due to LLVM PPC bug

2015-11-18 Thread Roland Scheidegger

Am 17.11.2015 um 21:35 schrieb Oded Gabbay:
> This patch disables the use of VSX instructions, as they cause some
> piglit tests to fail
> 
> For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7
> 
> With this patch, ppc64le reaches parity with x86-64 as far as piglit test
> suite is concerned.
> 
> v2:
> - Added check that we have at least LLVM 3.4
> - Added the LLVM bug URL as a comment in the code
> 
> v3:
> 
> - Only disable VSX if Altivec is supported, because if Altivec support
> is missing, then VSX support doesn't exist anyway.
> 
> - Change original patch description.
> 
> Signed-off-by: Oded Gabbay 
> Cc: "11.0" 
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> index 7bda118..3ee708f 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> @@ -536,6 +536,15 @@ 
> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
>  
>  #if defined(PIPE_ARCH_PPC)
> MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
> +#if HAVE_LLVM >= 0x0304
> +   /*
> +* Make sure VSX instructions are disabled
> +* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7
> +*/
> +   if (util_cpu_caps.has_altivec) {
> +  MAttrs.push_back("-vsx");
> +   }
> +#endif
>  #endif
>  
> builder.setMAttrs(MAttrs);
> 

Reviewed-by: Roland Scheidegger 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] [v2] i965: Add lossless compression to surface format table

2015-11-18 Thread Chad Versace

On Tue 17 Nov 2015, Ben Widawsky wrote:
> Background: Prior to Skylake and since Ivybridge Intel hardware has had the
> ability to use a MCS (Multisample Control Surface) as auxiliary data in
> "compression" operations on the surface. This reduces memory bandwidth.  This
> hardware was either used for MSAA compression, and fast clear operations.  On
> Gen8, a similar mechanism exists to allow the hiz buffer to be sampled from, 
> and
> therefore this feature is sometimes referred to more generally as "AUX 
> buffers".
> 
> Skylake adds the ability to have the display engine directly source compressed
> surfaces on top of the ability to sample from them. Inference dictates that
> enabling this display features adding a restriction to the formats which could
> actually be compressed. The current set of surfaces seems to be a subset as
> compared to previous gens (see the next patch). Also, if I had to guess I 
> would
> guess that future gens add support for more surface formats. To make handling
> this a bit easier to read, and more future proof, the support for this is 
> moved
> into the surface formats table.
> 
> Along with the modifications to the table, a helper function is also provided 
> to
> determine if a surface is CCS compatible.  Because fast clears are currently
^^^
Should say "CCS_E".

> disabled on SKL, we can plumb the helper all the way through here, and not
> actually have anything break.
> 
> The logic in the table works a bit differently than the other columns in the
> table and therefore deserves a small mention. For most other features, the GEN
> which began implementing it is set, and it is assumed future gens also support
> this. For this feature, GEN9 actually eliminates support for certain formats. 
> We
> could use this column to determine support for the similar feature on older
> generation hardware. Aside from that being an error prone task which is
> unrelated to enabling this on GEN9, it becomes somewhat tricky to implement
> because of the fact that surface format support diminishes. You'd probably 
> want
> another column to cleanly implement it.

Does the above paragraph still apply to the table's ccs_e column?
I understand your patch series, the ccs_e column behaves identically to
all other columns:

feature_is_supported == (10 * gen >= table[format].feature)

The patch's diff looks good to me. My only remaining questions/issues
with the patch are the ones stated in this message.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3] llvmpipe: disable VSX in ppc due to LLVM PPC bug

2015-11-18 Thread Oded Gabbay

On Tue, Nov 17, 2015 at 10:35 PM, Oded Gabbay  wrote:
> This patch disables the use of VSX instructions, as they cause some
> piglit tests to fail
>
> For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7
>
> With this patch, ppc64le reaches parity with x86-64 as far as piglit test
> suite is concerned.
>
> v2:
> - Added check that we have at least LLVM 3.4
> - Added the LLVM bug URL as a comment in the code
>
> v3:
>
> - Only disable VSX if Altivec is supported, because if Altivec support
> is missing, then VSX support doesn't exist anyway.
>
> - Change original patch description.
>
> Signed-off-by: Oded Gabbay 
> Cc: "11.0" 
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> index 7bda118..3ee708f 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> @@ -536,6 +536,15 @@ 
> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
>
>  #if defined(PIPE_ARCH_PPC)
> MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
> +#if HAVE_LLVM >= 0x0304
> +   /*
> +* Make sure VSX instructions are disabled
> +* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7
> +*/
> +   if (util_cpu_caps.has_altivec) {
> +  MAttrs.push_back("-vsx");
> +   }
> +#endif
>  #endif
>
> builder.setMAttrs(MAttrs);
> --
> 2.5.0
>

Hi,

I assume the V3 fixes your comments.
Could you please give me an r-b ?

Thanks,

   Oded
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] nir: fix typo in idiv lowering, causing large-udiv-udiv failures

2015-11-18 Thread Ilia Mirkin

On Wed, Nov 18, 2015 at 2:03 PM, Emil Velikov  wrote:
> Hi Ilia,
>
> On 11 November 2015 at 00:28, Ilia Mirkin  wrote:
>> On Tue, Nov 10, 2015 at 7:24 PM, Connor Abbott  wrote:
>>> On Tue, Nov 10, 2015 at 7:02 PM, Ilia Mirkin  wrote:
 On Tue, Nov 10, 2015 at 6:44 PM, Eric Anholt  wrote:
> Ilia Mirkin  writes:
>
>> In nv50, and in the python script that Rob circulated, we do:
>>
>>bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), TYPE_U32, m, 
>> b);
>>
>> Do the same in the nir div lowering pass. This fixes the large-udiv-udiv
>> piglit tests on freedreno.
>
> I assume you meant *-op-div-large-uint-uint.shader_test.

 Yes.

>
> vc4 doesn't have uge yet, but I've got a patch to add it and it does
> fix one subtest.  What this lowering pass is actually doing has never
> really made sense to me, but it works, so:
>
> Acked-by: Eric Anholt 

 It's a magical sequence of non-sensical operations which appear to
 produce the proper result with high probability... what's so confusing
 about that? :)

 More seriously, I think there are Newton-Raphson overtones in what
 it's doing, but I never fully traced it down. It kind of loses me
 after subtracting 2 from the integer representation of the float bits.
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>> Where did you get it from? Is there a paper somewhere explaining it? I
>>> sort of have a morbid curiosity, perhaps because I got sucked into
>>> implementing division/sqrt/rsq for doubles.
>>
>> I made the old freedreno impl based on nv50's div lowering code.
>> There's similarly weird code for r600, but I don't know if the r600 or
>> nv50 code came first, or where they came from. FWIW the nvc0 div logic
>> is quite different (implemented as a function call, which does rather
>> different things... I think). I also don't know where that came from.
>>
> Just checking that this hasn't fallen through the cracks. Afaics the
> only 'issue' with going forward is that no one has reviewed the patch
> yet, is that right ?

Nope. Rob's going to push it out along with a bunch of freedreno stuff "soon".

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] mesa: Don't expose GL_EXT_shader_integer_mix in GLES 1.x

2015-11-18 Thread Jason Ekstrand

Reviwed-by: Jason Ekstrand 

On Wed, Nov 18, 2015 at 9:44 AM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> There are no shaders, so it doesn't even make sense to expose the
> extension.
>
> Signed-off-by: Ian Romanick 
> Cc: Nanley Chery 
> ---
>  src/mesa/main/extensions_table.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index d12fd9f..8685a89 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -176,7 +176,7 @@ EXT(EXT_rescale_normal  , dummy_true
>  EXT(EXT_secondary_color , dummy_true 
> , GLL,  x ,  x ,  x , 1999)
>  EXT(EXT_separate_shader_objects , dummy_true 
> ,  x ,  x ,  x , ES2, 2013)
>  EXT(EXT_separate_specular_color , dummy_true 
> , GLL,  x ,  x ,  x , 1997)
> -EXT(EXT_shader_integer_mix  , EXT_shader_integer_mix 
> , GLL, GLC, ES1,  30, 2013)
> +EXT(EXT_shader_integer_mix  , EXT_shader_integer_mix 
> , GLL, GLC,  x ,  30, 2013)
>  EXT(EXT_shadow_funcs, ARB_shadow 
> , GLL,  x ,  x ,  x , 2002)
>  EXT(EXT_stencil_two_side, EXT_stencil_two_side   
> , GLL,  x ,  x ,  x , 2001)
>  EXT(EXT_stencil_wrap, dummy_true 
> , GLL,  x ,  x ,  x , 2002)
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] nir: fix typo in idiv lowering, causing large-udiv-udiv failures

2015-11-18 Thread Rob Clark

On Wed, Nov 18, 2015 at 2:06 PM, Ilia Mirkin  wrote:
> On Wed, Nov 18, 2015 at 2:03 PM, Emil Velikov  
> wrote:
>> Hi Ilia,
>>
>> On 11 November 2015 at 00:28, Ilia Mirkin  wrote:
>>> On Tue, Nov 10, 2015 at 7:24 PM, Connor Abbott  wrote:
 On Tue, Nov 10, 2015 at 7:02 PM, Ilia Mirkin  wrote:
> On Tue, Nov 10, 2015 at 6:44 PM, Eric Anholt  wrote:
>> Ilia Mirkin  writes:
>>
>>> In nv50, and in the python script that Rob circulated, we do:
>>>
>>>bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), TYPE_U32, m, 
>>> b);
>>>
>>> Do the same in the nir div lowering pass. This fixes the large-udiv-udiv
>>> piglit tests on freedreno.
>>
>> I assume you meant *-op-div-large-uint-uint.shader_test.
>
> Yes.
>
>>
>> vc4 doesn't have uge yet, but I've got a patch to add it and it does
>> fix one subtest.  What this lowering pass is actually doing has never
>> really made sense to me, but it works, so:
>>
>> Acked-by: Eric Anholt 
>
> It's a magical sequence of non-sensical operations which appear to
> produce the proper result with high probability... what's so confusing
> about that? :)
>
> More seriously, I think there are Newton-Raphson overtones in what
> it's doing, but I never fully traced it down. It kind of loses me
> after subtracting 2 from the integer representation of the float bits.
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

 Where did you get it from? Is there a paper somewhere explaining it? I
 sort of have a morbid curiosity, perhaps because I got sucked into
 implementing division/sqrt/rsq for doubles.
>>>
>>> I made the old freedreno impl based on nv50's div lowering code.
>>> There's similarly weird code for r600, but I don't know if the r600 or
>>> nv50 code came first, or where they came from. FWIW the nvc0 div logic
>>> is quite different (implemented as a function call, which does rather
>>> different things... I think). I also don't know where that came from.
>>>
>> Just checking that this hasn't fallen through the cracks. Afaics the
>> only 'issue' with going forward is that no one has reviewed the patch
>> yet, is that right ?
>
> Nope. Rob's going to push it out along with a bunch of freedreno stuff "soon".

and it's pushed

(along w/ gl31 for a3xx/a4xx :-))

>   -ilia
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] glsl: Fix off-by-one error in array size check assertion

2015-11-18 Thread Ian Romanick

On 11/18/2015 10:05 AM, Kenneth Graunke wrote:
> On Wednesday, November 18, 2015 09:44:34 AM Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> Apparently, this has been a bug since 2010 (c30f6e5d).
>>
>> Also use ARRAY_SIZE instead of open coding it.
>>
>> Signed-off-by: Ian Romanick 
>> Cc: Kenneth Graunke 
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>  src/glsl/ir.cpp | 3 +--
>>  1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
>> index 8b5ba71..80cbdbf 100644
>> --- a/src/glsl/ir.cpp
>> +++ b/src/glsl/ir.cpp
>> @@ -1425,8 +1425,7 @@ static const char * const tex_opcode_strs[] = { "tex", 
>> "txb", "txl", "txd", "txf
>>  
>>  const char *ir_texture::opcode_string()
>>  {
>> -   assert((unsigned int) op <=
>> -  sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]));
>> +   assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs));
>> return tex_opcode_strs[op];
>>  }
>>  
> 
> Good catch!

I was adding a new texture opcode, and I thought it was weird that the
INTEL_DEBUG=fs dumps showed "(null)" for the instruction opcode... then
I thought it was odd that the assertion didn't trigger. :)

> Reviewed-by: Kenneth Graunke 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] mesa: Don't expose GL_EXT_shader_integer_mix in GLES 1.x

2015-11-18 Thread Matt Turner

On Wed, Nov 18, 2015 at 11:50 AM, Jason Ekstrand  wrote:
> Reviwed-by: Jason Ekstrand 

idr: Please fix Jason's typo when you amend the patch.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/5] nir: add a vectorization pass

2015-11-18 Thread Jason Ekstrand

On Sat, Nov 14, 2015 at 6:59 PM, Connor Abbott  wrote:
> This effectively does the opposite of nir_lower_alus_to_scalar, trying
> to combine per-component ALU operations with the same sources but
> different swizzles into one larger ALU operation. It uses a similar
> model as CSE, where we do a depth-first approach and keep around a hash
> set of instructions to be combined, but there are a few major
> differences:
>
> 1. For now, we only support entirely per-component ALU operations.
> 2. Since it's not always guaranteed that we'll be able to combine
> equivalent instructions, we keep a stack of equivalent instructions
> around, trying to combine new instructions with instructions on the
> stack.
>
> The pass isn't comprehensive by far; it can't handle operations where
> some of the sources are per-component and others aren't, and it can't
> handle phi nodes. But it should handle the more common cases, and it
> should be reasonably efficient.
>
> Signed-off-by: Connor Abbott 
> ---
>  src/glsl/Makefile.sources|   1 +
>  src/glsl/nir/nir.h   |   2 +
>  src/glsl/nir/nir_opt_vectorize.c | 447 
> +++
>  3 files changed, 450 insertions(+)
>  create mode 100644 src/glsl/nir/nir_opt_vectorize.c
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index d4b02c1..7390975 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -70,6 +70,7 @@ NIR_FILES = \
> nir/nir_opt_peephole_select.c \
> nir/nir_opt_remove_phis.c \
> nir/nir_opt_undef.c \
> +   nir/nir_opt_vectorize.c \
> nir/nir_print.c \
> nir/nir_remove_dead_variables.c \
> nir/nir_search.c \
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index beabcaf..c1c04fd 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -2037,6 +2037,8 @@ bool nir_opt_remove_phis(nir_shader *shader);
>
>  bool nir_opt_undef(nir_shader *shader);
>
> +bool nir_opt_vectorize(nir_shader *shader);
> +
>  void nir_sweep(nir_shader *shader);
>
>  nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
> diff --git a/src/glsl/nir/nir_opt_vectorize.c 
> b/src/glsl/nir/nir_opt_vectorize.c
> new file mode 100644
> index 000..2a34a42
> --- /dev/null
> +++ b/src/glsl/nir/nir_opt_vectorize.c
> @@ -0,0 +1,447 @@
> +/*
> + * Copyright © 2015 Connor Abbott
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "nir.h"
> +#include "nir_vla.h"
> +#include "nir_builder.h"
> +#include  "nir_array.h"
> +
> +#define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
> +
> +static uint32_t
> +hash_src(uint32_t hash, const nir_src *src)
> +{
> +   assert(src->is_ssa);
> +
> +   return HASH(hash, src->ssa);
> +}
> +
> +static uint32_t
> +hash_alu_src(uint32_t hash, const nir_alu_src *src)
> +{
> +   assert(!src->abs && !src->negate);
> +
> +   /* intentionally don't hash swizzle */
> +
> +   return hash_src(hash, >src);
> +}
> +
> +static uint32_t
> +hash_alu(uint32_t hash, const nir_alu_instr *instr)
> +{
> +   hash = HASH(hash, instr->op);
> +
> +   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> +  hash = hash_alu_src(hash, >src[i]);
> +
> +   return hash;
> +}
> +
> +static uint32_t
> +hash_instr(const nir_instr *instr)
> +{
> +   uint32_t hash = _mesa_fnv32_1a_offset_bias;
> +
> +   switch (instr->type) {
> +   case nir_instr_type_alu:
> +  return hash_alu(hash, nir_instr_as_alu(instr));
> +   default:
> +  unreachable("bad instruction type");
> +   }
> +}
> +
> +static bool
> +srcs_equal(const nir_src *src1, const nir_src *src2)
> +{
> +   assert(src1->is_ssa);
> +   assert(src2->is_ssa);
> +
> +   return src1->ssa == src2->ssa;
> +}
> +
> +static bool
> +alu_srcs_equal(const nir_alu_src *src1, const

[Mesa-dev] [PATCH] mesa/extensions: Document the extensions table

2015-11-18 Thread Nanley Chery

From: Nanley Chery 

Help developers understand the table's organization.

Suggested-by: Jason Ekstrand 
Signed-off-by: Nanley Chery 
---
 src/mesa/main/extensions_table.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index d12fd9f..66c9a78 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -1,8 +1,25 @@
+/*
+ * This table contains information about extensions (see column descriptions). 
It is used to determine if an
+ * extension can be advertised to a client application.
+ *
+ * Using any of the following tokens in an API's column enables the extension 
for all versions of the API.
+ * Tokens representing the API are used instead of 'Y' to enable quick 
determination of the column's API.
+ */
 #define GLL 0
 #define GLC 0
 #define ES1 0
 #define ES2 0
+
+/*
+ * If a specific version is required within an API, it should be specified 
using a 2-digit number, e.g., 31, 33, 45.
+ * See EXT_shader_integer_mix for example.
+ *
+ * Using the following token in an API's column disables the extension for all 
versions of the API.
+ */
 #define  x ~0
+
+/* 
API ver required:
+ *  Name string , Driver enable flag   
  , GLL, GLC, ES1, ES2, Year */
 EXT(ARB_ES2_compatibility   , ARB_ES2_compatibility
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_ES3_compatibility   , ARB_ES3_compatibility
  , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_arrays_of_arrays, ARB_arrays_of_arrays 
  , GLL, GLC,  x ,  x , 2012)
-- 
2.6.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa/extensions: Document the extensions table

2015-11-18 Thread Emil Velikov

Hi Nanley,

On 18 November 2015 at 20:29, Nanley Chery  wrote:
> From: Nanley Chery 
>
> Help developers understand the table's organization.
>
> Suggested-by: Jason Ekstrand 
> Signed-off-by: Nanley Chery 
Can you bring back the comments that were in here, please. They might
have cause issues with sed prior but things should be fine now. Also
move the existing FIXME as originally, as mentioned by Marek and
myself.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] nir: remove mem_ctx arg from nir_shader_create()

2015-11-18 Thread Rob Clark

From: Rob Clark 

Reference counting (which is introduced in a subsequent patch) basically
should only be done on root nodes in the ralloc tree.  In particular,
having multiple threads calling in to ralloc for the same graph will not
work.

Whereas reference-counting of shaders will be a useful thing, there does
not appear to be any use case for having a nir_shader itself parented
under some other parent mem_ctx.  So remove that option.

Signed-off-by: Rob Clark 
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c| 2 +-
 src/glsl/nir/glsl_to_nir.cpp   | 2 +-
 src/glsl/nir/nir.c | 5 ++---
 src/glsl/nir/nir.h | 5 ++---
 src/glsl/nir/nir_clone.c   | 4 ++--
 src/glsl/nir/tests/control_flow_tests.cpp  | 2 +-
 src/mesa/drivers/dri/i965/brw_nir.c| 2 +-
 src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp | 2 +-
 src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp | 2 +-
 src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp   | 2 +-
 src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp   | 2 +-
 src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp  | 2 +-
 src/mesa/program/prog_to_nir.c | 2 +-
 13 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 0539cfc..dcc81af 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -1946,7 +1946,7 @@ tgsi_to_nir(const void *tgsi_tokens,
tgsi_scan_shader(tgsi_tokens, );
c->scan = 
 
-   s = nir_shader_create(NULL, tgsi_processor_to_shader_stage(scan.processor),
+   s = nir_shader_create(tgsi_processor_to_shader_stage(scan.processor),
  options);
 
nir_function *func = nir_function_create(s, "main");
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index e149d73..859a4ad 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -137,7 +137,7 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
 {
struct gl_shader *sh = shader_prog->_LinkedShaders[stage];
 
-   nir_shader *shader = nir_shader_create(NULL, stage, options);
+   nir_shader *shader = nir_shader_create(stage, options);
 
nir_visitor v1(shader);
nir_function_visitor v2();
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 93c18fb..568017a 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -30,11 +30,10 @@
 #include 
 
 nir_shader *
-nir_shader_create(void *mem_ctx,
-  gl_shader_stage stage,
+nir_shader_create(gl_shader_stage stage,
   const nir_shader_compiler_options *options)
 {
-   nir_shader *shader = ralloc(mem_ctx, nir_shader);
+   nir_shader *shader = ralloc(NULL, nir_shader);
 
exec_list_make_empty(>uniforms);
exec_list_make_empty(>inputs);
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e9d722e..e378f01 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1594,8 +1594,7 @@ typedef struct nir_shader {
   foreach_list_typed(nir_function_overload, overload, node, \
  &(func)->overload_list)
 
-nir_shader *nir_shader_create(void *mem_ctx,
-  gl_shader_stage stage,
+nir_shader *nir_shader_create(gl_shader_stage stage,
   const nir_shader_compiler_options *options);
 
 /** creates a register, including assigning it an index and adding it to the 
list */
@@ -1890,7 +1889,7 @@ void nir_index_blocks(nir_function_impl *impl);
 void nir_print_shader(nir_shader *shader, FILE *fp);
 void nir_print_instr(const nir_instr *instr, FILE *fp);
 
-nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
+nir_shader * nir_shader_clone(const nir_shader *s);
 
 #ifdef DEBUG
 void nir_validate_shader(nir_shader *shader);
diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c
index 33ff526..654cc0e 100644
--- a/src/glsl/nir/nir_clone.c
+++ b/src/glsl/nir/nir_clone.c
@@ -625,12 +625,12 @@ clone_function(clone_state *state, const nir_function 
*fxn, nir_shader *ns)
 }
 
 nir_shader *
-nir_shader_clone(void *mem_ctx, const nir_shader *s)
+nir_shader_clone(const nir_shader *s)
 {
clone_state state;
init_clone_state();
 
-   nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options);
+   nir_shader *ns = nir_shader_create(s->stage, s->options);
state.ns = ns;
 
clone_var_list(, >uniforms, >uniforms);
diff --git a/src/glsl/nir/tests/control_flow_tests.cpp 
b/src/glsl/nir/tests/control_flow_tests.cpp
index b9f90e6..dc7202a 100644
--- a/src/glsl/nir/tests/control_flow_tests.cpp
+++ b/src/glsl/nir/tests/control_flow_tests.cpp
@@ -37,7 +37,7 @@ protected:
 nir_cf_test::nir_cf_test()
 {
static const

[Mesa-dev] [PATCH 2/2] nir: add shader reference counting

2015-11-18 Thread Rob Clark

From: Rob Clark 

For gallium, at least, we'll need this to manage shader's lifetimes,
since in some cases both the driver and the state tracker will need
to hold on to a reference for variant managing.

Use nir_shader_mutable() before doing any IR opt/lowering/etc, to
ensure you are not modifying a copy someone else is also holding a
reference to.  In this way, unnecessary nir_shader_clone()s are
avoided whenever possible.

v2: make nir_shader_ref() return itself.. convenient for doing things
like 'nir_shader_mutable(nir_shader_ref(foo))'..

Signed-off-by: Rob Clark 
---
fwiw, patches using refcnt'ing at:
https://github.com/freedreno/mesa/commits/wip-nir-refcnt

 src/gallium/drivers/vc4/vc4_program.c |  2 +-
 src/glsl/nir/nir.c|  2 ++
 src/glsl/nir/nir.h| 41 +++
 src/mesa/program/program.c|  3 ++-
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 52317bd..c9970a0 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1741,7 +1741,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
 c->num_uniforms);
 }
 
-ralloc_free(c->s);
+nir_shader_unref(c->s);
 
 return c;
 }
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 568017a..219f5d9 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -35,6 +35,8 @@ nir_shader_create(gl_shader_stage stage,
 {
nir_shader *shader = ralloc(NULL, nir_shader);
 
+   p_atomic_set(>refcount, 1);
+
exec_list_make_empty(>uniforms);
exec_list_make_empty(>inputs);
exec_list_make_empty(>outputs);
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e378f01..c2a46ea 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -34,6 +34,7 @@
 #include "util/ralloc.h"
 #include "util/set.h"
 #include "util/bitset.h"
+#include "util/u_atomic.h"
 #include "nir_types.h"
 #include "shader_enums.h"
 #include 
@@ -1546,6 +1547,8 @@ typedef struct nir_shader_info {
 } nir_shader_info;
 
 typedef struct nir_shader {
+   int refcount;
+
/** list of uniforms (nir_variable) */
struct exec_list uniforms;
 
@@ -1891,6 +1894,44 @@ void nir_print_instr(const nir_instr *instr, FILE *fp);
 
 nir_shader * nir_shader_clone(const nir_shader *s);
 
+static inline nir_shader *
+nir_shader_ref(nir_shader *shader)
+{
+   p_atomic_inc(>refcount);
+   return shader;
+}
+
+static inline void
+nir_shader_unref(nir_shader *shader)
+{
+   if (p_atomic_dec_zero(>refcount)) {
+  ralloc_free(shader);
+   }
+}
+
+/* A shader with only a single reference is mutable: */
+static inline bool
+nir_shader_is_mutable(nir_shader *shader)
+{
+   return p_atomic_read(>refcount) == 1;
+}
+
+/* Convert a shader reference into a mutable shader reference.  Ie. if
+ * there is only a single reference to the shader, then return that,
+ * otherwise clone and drop reference to existing shader.
+ */
+static inline nir_shader *
+nir_shader_mutable(nir_shader *shader)
+{
+   if (nir_shader_is_mutable(shader)) {
+  return shader;
+   } else {
+  nir_shader *ns = nir_shader_clone(shader);
+  nir_shader_unref(shader);
+  return ns;
+   }
+}
+
 #ifdef DEBUG
 void nir_validate_shader(nir_shader *shader);
 void nir_metadata_set_validation_flag(nir_shader *shader);
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 0e78e6a..c2da66e 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -38,6 +38,7 @@
 #include "prog_parameter.h"
 #include "prog_instruction.h"
 #include "util/ralloc.h"
+#include "nir.h"
 
 
 /**
@@ -273,7 +274,7 @@ _mesa_delete_program(struct gl_context *ctx, struct 
gl_program *prog)
}
 
if (prog->nir) {
-  ralloc_free(prog->nir);
+  nir_shader_unref(prog->nir);
}
 
mtx_destroy(>Mutex);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 19/42] glsl ubo/ssbo: Move common code into lower_buffer_access::setup_buffer_access

2015-11-18 Thread Iago Toral

On Tue, 2015-11-17 at 21:54 -0800, Jordan Justen wrote:
> This code will also be usable by the pass to lower shared variables.
> 
> Note, that *const_offset is adjusted by setup_buffer_access so it must
> be initialized before calling setup_buffer_access.
> 
> v2:
>  * Add comment for lower_buffer_access::setup_buffer_access
> 
> Signed-off-by: Jordan Justen 
> Cc: Samuel Iglesias Gonsalvez 
> Cc: Iago Toral Quiroga 
> Reviewed-by: Iago Toral Quiroga 
> ---
>  src/glsl/lower_buffer_access.cpp | 177 
> +++
>  src/glsl/lower_buffer_access.h   |   5 ++
>  src/glsl/lower_ubo_reference.cpp | 160 +--
>  3 files changed, 185 insertions(+), 157 deletions(-)
> 
> diff --git a/src/glsl/lower_buffer_access.cpp 
> b/src/glsl/lower_buffer_access.cpp
> index b5fe6e3..297ed69 100644
> --- a/src/glsl/lower_buffer_access.cpp
> +++ b/src/glsl/lower_buffer_access.cpp
> @@ -305,4 +305,181 @@ 
> lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref)
> return false;
>  }
>  
> +/**
> + * This function initializes various values that will be used later by
> + * emit_access when actually emitting loads or stores.
> + *
> + * Note: const_offset is an input as well as an output. For UBO and SSBO, the
> + * caller should initialize it to 0 to point to the start of the buffer
> + * object. For compute shader shared variables it will be initialized to the
> + * offset of variable in the shared variable storage block.

I think this is not true, your version changes UBO and SSBO to behave
just like shader variables since you no longer ever update *const_offset
when you find an ir_type_dereference_variable node. Instead, you always
initialize *const_offset to the value of ubo_var->Offset in
setup_for_load_or_store. I think you can just rewrite the note comment
above as:

"const_offset is an input as well as an output, clients must initialize
it to the offset of the variable in the underlying block, and this
function will adjust it by adding the constant offset of the member
being accessed into that variable"

> + */
> +void
> +lower_buffer_access::setup_buffer_access(void *mem_ctx,
> + ir_variable *var,
> + ir_rvalue *deref,
> + ir_rvalue **offset,
> + unsigned *const_offset,
> + bool *row_major,
> + int *matrix_columns,
> + unsigned packing)
> +{
> +   *offset = new(mem_ctx) ir_constant(0u);
> +   *row_major = is_dereferenced_thing_row_major(deref);
> +   *matrix_columns = 1;
> +
> +   /* Calculate the offset to the start of the region of the UBO
> +* dereferenced by *rvalue.  This may be a variable offset if an
> +* array dereference has a variable index.
> +*/
> +   while (deref) {
> +  switch (deref->ir_type) {
> +  case ir_type_dereference_variable: {
> + deref = NULL;
> + break;
> +  }
> +
> +  case ir_type_dereference_array: {
> + ir_dereference_array *deref_array = (ir_dereference_array *) deref;
> + unsigned array_stride;
> + if (deref_array->array->type->is_vector()) {
> +/* We get this when storing or loading a component out of a 
> vector
> + * with a non-constant index. This happens for v[i] = f where v 
> is
> + * a vector (or m[i][j] = f where m is a matrix). If we don't
> + * lower that here, it gets turned into v = vector_insert(v, i,
> + * f), which loads the entire vector, modifies one component and
> + * then write the entire thing back.  That breaks if another
> + * thread or SIMD channel is modifying the same vector.
> + */
> +array_stride = 4;
> +if (deref_array->array->type->is_double())
> +   array_stride *= 2;
> + } else if (deref_array->array->type->is_matrix() && *row_major) {
> +/* When loading a vector out of a row major matrix, the
> + * step between the columns (vectors) is the size of a
> + * float, while the step between the rows (elements of a
> + * vector) is handled below in emit_ubo_loads.
> + */
> +array_stride = 4;
> +if (deref_array->array->type->is_double())
> +   array_stride *= 2;
> +*matrix_columns = deref_array->array->type->matrix_columns;
> + } else if (deref_array->type->without_array()->is_interface()) {
> +/* We're processing an array dereference of an interface instance
> + * array. The thing being dereferenced *must* be a variable
> + * dereference because interfaces cannot be

Re: [Mesa-dev] [PATCH 1/2] dri_interface: Introduce __DRI_IMAGE_USE_SCANOUT_ROTATION_* flags (v2)

2015-11-18 Thread Michel Dänzer

On 07.11.2015 12:05, Vivek Kasireddy wrote:
> These flags can be used by the DRI driver to set additional requirements
> such as tiling while creating buffers.
> 
> v2: Added a brief comment to explain the rotation orientation.
> 
> Cc: Michel Danzer 
> Signed-off-by: Vivek Kasireddy 
> ---
>  include/GL/internal/dri_interface.h | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/include/GL/internal/dri_interface.h 
> b/include/GL/internal/dri_interface.h
> index 6bbd3fa..c72c365 100644
> --- a/include/GL/internal/dri_interface.h
> +++ b/include/GL/internal/dri_interface.h
> @@ -1101,6 +1101,15 @@ struct __DRIdri2ExtensionRec {
>  #define __DRI_IMAGE_USE_CURSOR   0x0004 /* Depricated */
>  #define __DRI_IMAGE_USE_LINEAR   0x0008
>  
> +/**
> + * Setting a rotation angle of 90 or 270 would result in the scanout
> + * buffer being rotated in a clounter clockwise manner. This is the
> + * expected behavior for ensuring XRandR compliance.
> + */
> +#define __DRI_IMAGE_USE_SCANOUT_ROTATION_90  0x0010
> +#define __DRI_IMAGE_USE_SCANOUT_ROTATION_180 0x0020
> +#define __DRI_IMAGE_USE_SCANOUT_ROTATION_270 0x0040

Please don't leave an empty line between the existing __DRI_IMAGE_USE
defines and the ones you're adding, to prevent others from accidentally
adding conflicting defines.


Also, this has the same "clounter" typo Dieter pointed out in patch 2.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] gbm: Add flags to enable creation of rotated scanout buffers (v4)

2015-11-18 Thread Michel Dänzer

On 07.11.2015 12:05, Vivek Kasireddy wrote:
> For certain platforms that support rotated scanout buffers, currently,
> there is no way to create them with the GBM DRI interface. These flags
> will instruct the DRI driver to create the buffer by setting
> additional requirements such as tiling mode.
> 
> v2: Reserve a bit per angle. (Ville and Michel)
> 
> v3:
> - Combine all GBM_BO_USE_SCANOUT_ROTATION_* flags into
>   GBM_BO_USE_SCANOUT_ANY macro (Michel)
> - Pull the code that updates dri_use based on the rotation flag
>   into a separate function.
> 
> v4:
> - Added a brief comment to explain the rotation orientation.
> - Augmented the helper function gbm_to_dri_flag() introduced in v3
>   to handle GBM_BO_USE_CURSOR and GBM_BO_USE_LINEAR as well. (Michel)

It would be cleaner to split out the refactoring of the handling of
existing flags into gbm_to_dri_flag() in a new patch 2, and only add the
new flags in a new patch 3.

Apart from that and the typo pointed out by Dieter, looks good to me.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 0/9] gallium: batch query objects and related cleanups

2015-11-18 Thread Nicolai Hähnle

Hi everybody,

this is the next iteration of the series, rebased on current master and
with the adjustment to nv50 queries. Please take a look! Samuel, let me know
when you've tested this!

Cheers,
Nicolai
---
 gallium/auxiliary/hud/hud_context.c   |   24 +-
 gallium/auxiliary/hud/hud_driver_query.c  |  271 +-
 gallium/auxiliary/hud/hud_private.h   |   13 +
 gallium/drivers/nouveau/nv50/nv50_query.c |1 
 gallium/drivers/nouveau/nvc0/nvc0_query.c |5 
 gallium/include/pipe/p_context.h  |   19 ++
 gallium/include/pipe/p_defines.h  |   36 ++-
 mesa/state_tracker/st_cb_perfmon.c|  258 
 mesa/state_tracker/st_cb_perfmon.h|   32 ++-
 mesa/state_tracker/st_context.h   |3 
 10 files changed, 486 insertions(+), 176 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 5/9] st/mesa: use BITSET_FOREACH_SET to loop through active perfmon counters

2015-11-18 Thread Nicolai Hähnle

Reviewed-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 80ff170..ec12eb2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -50,6 +50,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
   const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -60,14 +61,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
-  for (cid = 0; cid < g->NumCounters; cid++) {
- const struct gl_perf_monitor_counter *c = >Counters[cid];
+  BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
 
- if (!BITSET_TEST(m->ActiveCounters[gid], cid))
-continue;
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 7/9] gallium: add the concept of batch queries

2015-11-18 Thread Nicolai Hähnle

Some drivers (in particular radeon[si], but also freedreno judging from
a quick grep) may want to expose performance counters that cannot be
individually enabled or disabled.

Allow such drivers to mark driver-specific queries as requiring a new
type of batch query object that is used to start and stop a list of queries
simultaneously.

v3: adjust recently added nv50 queries

v2: documentation for create_batch_query
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  1 +
 src/gallium/include/pipe/p_context.h  | 19 +++
 src/gallium/include/pipe/p_defines.h  | 27 +--
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 643d430..6b3e49a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -174,6 +174,7 @@ nv50_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+   info->flags = 0;
 
return nv50_hw_get_driver_query_info(screen, id, info);
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 1f1270e..d992b10 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+   info->flags = 0;
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 27f358f..be7447d 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -116,6 +116,25 @@ struct pipe_context {
unsigned query_type,
unsigned index );
 
+   /**
+* Create a query object that queries all given query types simultaneously.
+*
+* This can only be used for those query types for which
+* get_driver_query_info indicates that it must be used. Only one batch
+* query object may be active at a time.
+*
+* There may be additional constraints on which query types can be used
+* together, in particular those that are implied by
+* get_driver_query_group_info.
+*
+* \param num_queries the number of query types
+* \param query_types array of \p num_queries query types
+* \return a query object, or NULL on error.
+*/
+   struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+ unsigned num_queries,
+ unsigned *query_types );
+
void (*destroy_query)(struct pipe_context *pipe,
  struct pipe_query *q);
 
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7ed9f6d..b3c8b9f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics
 };
 
 /**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+   uint64_t u64;
+   uint32_t u32;
+   float f;
+};
+
+/**
  * Query result (returned by pipe_context::get_query_result).
  */
 union pipe_query_result
@@ -811,6 +821,9 @@ union pipe_query_result
 
/* PIPE_QUERY_PIPELINE_STATISTICS */
struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+   /* batch queries */
+   union pipe_numeric_type_union batch[0];
 };
 
 union pipe_color_union
@@ -840,12 +853,13 @@ enum pipe_driver_query_result_type
PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
 };
 
-union pipe_numeric_type_union
-{
-   uint64_t u64;
-   uint32_t u32;
-   float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0)
 
 struct pipe_driver_query_info
 {
@@ -855,6 +869,7 @@ struct pipe_driver_query_info
enum pipe_driver_query_type type;
enum pipe_driver_query_result_type result_type;
unsigned group_id;
+   unsigned flags;
 };
 
 struct pipe_driver_query_group_info
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 8/9] gallium/hud: add support for batch queries

2015-11-18 Thread Nicolai Hähnle

v2 + v3: be more defensive about allocations
---
 src/gallium/auxiliary/hud/hud_context.c  |  24 ++-
 src/gallium/auxiliary/hud/hud_driver_query.c | 270 +++
 src/gallium/auxiliary/hud/hud_private.h  |  13 +-
 3 files changed, 261 insertions(+), 46 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8..bcef701 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -57,6 +57,7 @@ struct hud_context {
struct cso_context *cso;
struct u_upload_mgr *uploader;
 
+   struct hud_batch_query_context *batch_query;
struct list_head pane_list;
 
/* states */
@@ -510,6 +511,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_alloc_vertices(hud, >text, 4 * 512, 4 * sizeof(float));
 
/* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
LIST_FOR_EACH_ENTRY(pane, >pane_list, head) {
   LIST_FOR_EACH_ENTRY(gr, >graph_list, head) {
  gr->query_new_value(gr);
@@ -903,17 +906,21 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   }
   else if (strcmp(name, "samples-passed") == 0 &&
has_occlusion_query(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"samples-passed",
 PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else if (strcmp(name, "primitives-generated") == 0 &&
has_streamout(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"primitives-generated",
 PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else {
  boolean processed = FALSE;
@@ -938,17 +945,19 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
if (strcmp(name, pipeline_statistics_names[i]) == 0)
   break;
 if (i < Elements(pipeline_statistics_names)) {
-   hud_pipe_query_install(pane, hud->pipe, name,
+   hud_pipe_query_install(>batch_query, pane, hud->pipe, name,
   PIPE_QUERY_PIPELINE_STATISTICS, i,
   0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+  0);
processed = TRUE;
 }
  }
 
  /* driver queries */
  if (!processed) {
-if (!hud_driver_query_install(pane, hud->pipe, name)){
+if (!hud_driver_query_install(>batch_query, pane, hud->pipe,
+  name)) {
fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", 
name);
 }
  }
@@ -1287,6 +1296,7 @@ hud_destroy(struct hud_context *hud)
   FREE(pane);
}
 
+   hud_batch_query_cleanup(>batch_query);
pipe->delete_fs_state(pipe, hud->fs_color);
pipe->delete_fs_state(pipe, hud->fs_text);
pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 3198ab3..d7b1f11 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,164 @@
 #include "hud/hud_private.h"
 #include "pipe/p_screen.h"
 #include "os/os_time.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include 
 
+// Must be a power of two
 #define NUM_QUERIES 8
 
+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+  return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+  pipe->end_query(pipe, bq->query[bq->head]);
+
+   bq->results = 0;
+
+   while (bq->pending) {
+  unsigned idx =

[Mesa-dev] [PATCH v3 9/9] st/mesa: add support for batch driver queries to perfmon

2015-11-18 Thread Nicolai Hähnle

v2 + v3: forgot null-pointer checks (spotted by Samuel Pitoiset)
---
 src/mesa/state_tracker/st_cb_perfmon.c | 83 +++---
 src/mesa/state_tracker/st_cb_perfmon.h |  6 +++
 2 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 8628e23..8fdf0e8 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,7 +42,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned *batch = NULL;
unsigned num_active_counters = 0;
+   unsigned max_batch_counters = 0;
+   unsigned num_batch_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
@@ -50,6 +53,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,6 +65,8 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   }
 
   num_active_counters += m->ActiveGroups[gid];
+  if (stg->has_batch)
+ max_batch_counters += m->ActiveGroups[gid];
}
 
if (!num_active_counters)
@@ -71,6 +77,12 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
if (!stm->active_counters)
   return false;
 
+   if (max_batch_counters) {
+  batch = CALLOC(max_batch_counters, sizeof(*batch));
+  if (!batch)
+ return false;
+   }
+
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
@@ -82,13 +94,35 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  struct st_perf_counter_object *cntr =
 >active_counters[stm->num_active_counters];
 
- cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+cntr->batch_index = num_batch_counters;
+batch[num_batch_counters++] = stc->query_type;
+ } else {
+cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+if (!cntr->query)
+   goto fail;
+ }
  ++stm->num_active_counters;
   }
}
+
+   /* Create the batch query. */
+   if (num_batch_counters) {
+  stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+  batch);
+  stm->batch_result = CALLOC(num_batch_counters, 
sizeof(stm->batch_result->batch[0]));
+  if (!stm->batch_query || !stm->batch_result)
+ goto fail;
+   }
+
+   FREE(batch);
return true;
+
+fail:
+   FREE(batch);
+   return false;
 }
 
 static void
@@ -105,6 +139,13 @@ reset_perf_monitor(struct st_perf_monitor_object *stm,
FREE(stm->active_counters);
stm->active_counters = NULL;
stm->num_active_counters = 0;
+
+   if (stm->batch_query) {
+  pipe->destroy_query(pipe, stm->batch_query);
+  stm->batch_query = NULL;
+   }
+   FREE(stm->batch_result);
+   stm->batch_result = NULL;
 }
 
 static struct gl_perf_monitor_object *
@@ -143,9 +184,13 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Start the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  if (!pipe->begin_query(pipe, query))
+  if (query && !pipe->begin_query(pipe, query))
   goto fail;
}
+
+   if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+  goto fail;
+
return true;
 
 fail:
@@ -164,8 +209,12 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Stop the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  pipe->end_query(pipe, query);
+  if (query)
+ pipe->end_query(pipe, query);
}
+
+   if (stm->batch_query)
+  pipe->end_query(pipe, stm->batch_query);
 }
 
 static void
@@ -199,11 +248,16 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
   union pipe_query_result result;
-  if (!pipe->get_query_result(pipe, query,

[Mesa-dev] [PATCH v3 2/9] gallium/hud: remove unused field in query_info

2015-11-18 Thread Nicolai Hähnle

Reviewed-by: Samuel Pitoiset 
---
 src/gallium/auxiliary/hud/hud_driver_query.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305e..3198ab3 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -48,7 +48,6 @@ struct query_info {
/* Ring of queries. If a query is busy, we use another slot. */
struct pipe_query *query[NUM_QUERIES];
unsigned head, tail;
-   unsigned num_queries;
 
uint64_t last_time;
uint64_t results_cumulative;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v3 4/9] st/mesa: store mapping from perfmon counter to query type

2015-11-18 Thread Nicolai Hähnle

Previously, when a performance monitor was initialized, an inner loop through
all driver queries with string comparisons for each enabled performance
monitor counter was used. This hurts when a driver exposes lots of queries.

Reviewed-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 74 +++---
 src/mesa/state_tracker/st_cb_perfmon.h | 14 +++
 src/mesa/state_tracker/st_context.h|  3 ++
 3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index dedb8f5..80ff170 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,48 +36,20 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 
-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
-   int num_queries;
-   int type = -1;
-   int i;
-
-   num_queries = screen->get_driver_query_info(screen, 0, NULL);
-   if (!num_queries)
-  return type;
-
-   for (i = 0; i < num_queries; i++) {
-  struct pipe_driver_query_info info;
-
-  if (!screen->get_driver_query_info(screen, i, ))
- continue;
-
-  if (!strncmp(info.name, name, strlen(name))) {
- type = info.query_type;
- break;
-  }
-   }
-   return type;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
+   struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
-   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_context *pipe = st->pipe;
int gid, cid;
 
-   st_flush_bitmap_cache(st_context(ctx));
+   st_flush_bitmap_cache(st);
 
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -90,20 +62,17 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 
   for (cid = 0; cid < g->NumCounters; cid++) {
  const struct gl_perf_monitor_counter *c = >Counters[cid];
+ const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
- int query_type;
 
  if (!BITSET_TEST(m->ActiveCounters[gid], cid))
 continue;
 
- query_type = find_query_type(screen, c->Name);
- assert(query_type != -1);
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
 
- cntr->query= pipe->create_query(pipe, query_type, 0);
+ cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
 
@@ -286,6 +255,7 @@ st_init_perfmon(struct st_context *st)
struct gl_perf_monitor_state *perfmon = >ctx->PerfMonitor;
struct pipe_screen *screen = st->pipe->screen;
struct gl_perf_monitor_group *groups = NULL;
+   struct st_perf_monitor_group *stgroups = NULL;
int num_counters, num_groups;
int gid, cid;
 
@@ -304,26 +274,36 @@ st_init_perfmon(struct st_context *st)
if (!groups)
   return false;
 
+   stgroups = CALLOC(num_groups, sizeof(*stgroups));
+   if (!stgroups)
+  goto fail_only_groups;
+
for (gid = 0; gid < num_groups; gid++) {
   struct gl_perf_monitor_group *g = [perfmon->NumGroups];
   struct pipe_driver_query_group_info group_info;
   struct gl_perf_monitor_counter *counters = NULL;
+  struct st_perf_monitor_counter *stcounters = NULL;
 
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
-  g->NumCounters = 0;
-  g->Counters = NULL;
 
   if (group_info.num_queries)
  counters = CALLOC(group_info.num_queries, sizeof(*counters));
   if (!counters)
  goto fail;
+  g->Counters = counters;
+
+  stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+  if (!stcounters)
+ goto fail;
+  stgroups[perfmon->NumGroups].counters = stcounters;
 
   for (cid = 0; cid < num_counters; cid++) {
  struct gl_perf_monitor_counter *c = [g->NumCounters];
+ struct st_perf_monitor_counter *stc = [g->NumCounters];
  struct pipe_driver_query_info info;
 
  if (!screen->get_driver_query_info(screen, cid, ))
@@ -359,18 +339,25 @@ st_init_perfmon(struct st_context *st)
 default:

[Mesa-dev] [PATCH v3 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-18 Thread Nicolai Hähnle

This was only used to implement an unnecessarily restrictive interpretation
of the spec of AMD_performance_monitor. The spec says

  A performance monitor consists of a number of hardware and software
  counters that can be sampled by the GPU and reported back to the
  application.

I guess one could take this as a requirement that counters _must_ be sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.

v3: add a missing change in the nouveau driver (thanks Samuel Pitoiset)
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  4 
 src/gallium/include/pipe/p_defines.h  |  7 ---
 src/mesa/state_tracker/st_cb_perfmon.c| 30 ---
 3 files changed, 41 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index edde57e..1f1270e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
   if (screen->compute) {
  info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 
  /* Because we can't expose the number of hardware counters needed for
   * each different query, we don't want to allow more than one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
   if (screen->compute) {
  if (screen->base.class_3d < NVE4_3D_CLASS) {
 info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 info->max_active_queries = 1;
 info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
 return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
   info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
   info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   return 1;
@@ -245,7 +242,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
info->name = "this_is_not_the_query_group_you_are_looking_for";
info->max_active_queries = 0;
info->num_queries = 0;
-   info->type = 0;
return 0;
 }
 
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
PIPE_DRIVER_QUERY_TYPE_HZ   = 6,
 };
 
-enum pipe_driver_query_group_type
-{
-   PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
-   PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
 /* Whether an average value per frame or a cumulative value should be
  * displayed.
  */
@@ -864,7 +858,6 @@ struct pipe_driver_query_info
 struct pipe_driver_query_group_info
 {
const char *name;
-   enum pipe_driver_query_group_type type;
unsigned max_active_queries;
unsigned num_queries;
 };
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be3..4ec6d86 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -65,27 +65,6 @@ find_query_type(struct pipe_screen *screen, const char *name)
return type;
 }
 
-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
-   int num_groups, gid;
-
-   num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
-   for (gid = 0; gid < num_groups; gid++) {
-  struct pipe_driver_query_group_info group_info;
-
-  if (!screen->get_driver_query_group_info(screen, gid, _info))
- continue;
-
-  if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- return true;
-   }
-   return false;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
@@ -313,12 +292,6 @@ st_init_perfmon(struct st_context *st)
if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
   return false;
 
-   if (!has_gpu_counters(screen)) {
-  /* According to the spec, GL_AMD_performance_monitor must only
-   * expose GPU counters. */
-  return false;
-   }
-
/* Get the number of available queries. */
num_counters = screen->get_driver_query_info(screen, 0, NULL);
if (!num_counters)
@@ -339,9 +312,6 @@ st_init_perfmon(struct st_context *st)
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
-  if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- continue;
-

[Mesa-dev] [PATCH v3 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-18 Thread Nicolai Hähnle

Reviewed-by: Samuel Pitoiset 
---
 src/gallium/include/pipe/p_defines.h   | 2 ++
 src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
/* PIPE_QUERY_PRIMITIVES_GENERATED */
/* PIPE_QUERY_PRIMITIVES_EMITTED */
/* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */
/* PIPE_DRIVER_QUERY_TYPE_HZ */
uint64_t u64;
 
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
  c->Name = info.name;
  switch (info.type) {
 case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
c->Minimum.u64 = 0;
c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
c->Type = GL_UNSIGNED_INT64_AMD;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 70264] EGL pkg-config does not provide correct cflags

2015-11-18 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=70264

Pekka Paalanen  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |NOTABUG

--- Comment #2 from Pekka Paalanen  ---
In lack of response, closing this as not a bug, because I think stuff works as
intended.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 92987] fails to determine screen refresh rate when rendering is offloaded

2015-11-18 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=92987

--- Comment #2 from Oliver Neukum  ---
(In reply to Stefan Dirsch from comment #1)
> What's wrong with limiting fps to the refresh rate of 60Hz? vblank_mode in
> /etc/drirc (~/.drirc) controls this. It's enabled (=1) by default.

Nothing is wrong with that. It is wrong not to do it. The error case is
rendering with Radeon. I suppose Mesa fails to understand that the rendering
device needs to take the refresh rate of an alien crtc.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 92987] fails to determine screen refresh rate when rendering is offloaded

2015-11-18 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=92987

Oliver Neukum  changed:

   What|Removed |Added

 Status|NEEDINFO|NEW

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 05/36] i965: Import tables enumerating the set of validated L3 configurations.

2015-11-18 Thread Francisco Jerez

Ben Widawsky  writes:

> On Sat, Nov 14, 2015 at 01:43:41PM -0800, Jordan Justen wrote:
>> From: Francisco Jerez 
>> 
>> It should be possible to use additional L3 configurations other than
>> the ones listed in the tables of validated allocations ("BSpec »
>> 3D-Media-GPGPU Engine » L3 Cache and URB [IVB+] » L3 Cache and URB [*]
>> » L3 Allocation and Programming"), but it seems sensible for now to
>> hard-code the tables in order to stick to the hardware docs.  Instead
>> of setting up the arbitrary L3 partitioning given as input, the
>> closest validated L3 configuration will be looked up in these tables
>> and used to program the hardware.
>> 
>> The included tables should work for Gen7-9.  Note that the quantities
>> are specified in ways rather than in KB, this is because the L3
>> control registers expect the value in ways, and because by doing that
>> we can re-use a single table for all GT variants of the same
>> generation (and in the case of IVB/HSW and CHV/SKL across different
>> generations) which generally have different L3 way sizes but allow the
>> same combinations of way allocations.
>> ---
>>  src/mesa/drivers/dri/i965/Makefile.sources |   1 +
>>  src/mesa/drivers/dri/i965/gen7_l3_state.c  | 163 
>> +
>>  2 files changed, 164 insertions(+)
>>  create mode 100644 src/mesa/drivers/dri/i965/gen7_l3_state.c
>> 
>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
>> b/src/mesa/drivers/dri/i965/Makefile.sources
>> index 5a88d66..91901ad 100644
>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>> @@ -184,6 +184,7 @@ i965_FILES = \
>>  gen7_cs_state.c \
>>  gen7_disable.c \
>>  gen7_gs_state.c \
>> +gen7_l3_state.c \
>>  gen7_misc_state.c \
>>  gen7_sf_state.c \
>>  gen7_sol_state.c \
>> diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c 
>> b/src/mesa/drivers/dri/i965/gen7_l3_state.c
>> new file mode 100644
>> index 000..8f9ba5b
>> --- /dev/null
>> +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
>> @@ -0,0 +1,163 @@
>> +/*
>> + * Copyright (c) 2015 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "brw_context.h"
>> +#include "brw_defines.h"
>> +#include "brw_state.h"
>> +#include "intel_batchbuffer.h"
>> +
>> +/**
>> + * Chunk of L3 cache reserved for some specific purpose.
>> + */
>> +enum brw_l3_partition {
>> +   /** Shared local memory. */
>> +   L3P_SLM = 0,
>> +   /** Unified return buffer. */
>> +   L3P_URB,
>> +   /** Union of DC and RO. */
>> +   L3P_ALL,
>> +   /** Data cluster RW partition. */
>> +   L3P_DC,
>> +   /** Union of IS, C and T. */
>> +   L3P_RO,
>> +   /** Instruction and state cache. */
>> +   L3P_IS,
>> +   /** Constant cache. */
>> +   L3P_C,
>> +   /** Texture cache. */
>> +   L3P_T,
>> +   /** Number of supported L3 partitions. */
>> +   NUM_L3P
>> +};
>> +
>> +/**
>> + * L3 configuration represented as the number of ways allocated for each
>> + * partition.  \sa get_l3_way_size().
>> + */
>> +struct brw_l3_config {
>> +   unsigned n[NUM_L3P];
>> +};
>> +
>> +/**
>> + * IVB/HSW validated L3 configurations.
>> + */
>> +static const struct brw_l3_config ivb_l3_configs[] = {
>> +   {{  0, 32,  0,  0, 32,  0,  0,  0 }},
>> +   {{  0, 32,  0, 16, 16,  0,  0,  0 }},
>> +   {{  0, 32,  0,  4,  0,  8,  4, 16 }},
>> +   {{  0, 28,  0,  8,  0,  8,  4, 16 }},
>> +   {{  0, 28,  0, 16,  0,  8,  4,  8 }},
>> +   {{  0, 28,  0,  8,  0, 16,  4,  8 }},
>> +   {{  0, 28,  0,  0,  0, 16,  4, 16 }},
>> +   {{  0, 32,  0,  0,  0, 16,  0, 16 }},
>> +   {{  0, 28,  0,  4, 32,  0,  0,  0 }},
>> +   {{ 16, 16,  0, 16, 16,  0,  0,  0 }},
>> +   {{ 16, 16,  0,  8,  0,  8,  8,  8 }},
>> +   {{ 16, 16,  0,  4,  0,  8,  4, 16 }},
>> +   {{ 16, 16,  0,  4,

[Mesa-dev] [PATCH v3 6/9] st/mesa: maintain active perfmon counters in an array

2015-11-18 Thread Nicolai Hähnle

It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.

v2: make sure init_perf_monitor returns true when no counters are active
(spotted by Samuel Pitoiset)

Reviewed-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 81 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..8628e23 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = >perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,32 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   if (!num_active_counters)
+  return true;
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+>active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(>list, >active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +95,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, >active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(>list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(>active_counters);
+   if (stq)
   return >base;
-   }
return NULL;
 }
 
@@ -119,9 +131,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(>active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +141,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -146,11 +159,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe =

[Mesa-dev] [Bug 92987] fails to determine screen refresh rate when rendering is offloaded

2015-11-18 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=92987

Stefan Dirsch  changed:

   What|Removed |Added

 Status|NEW |NEEDINFO

--- Comment #1 from Stefan Dirsch  ---
What's wrong with limiting fps to the refresh rate of 60Hz? vblank_mode in
/etc/drirc (~/.drirc) controls this. It's enabled (=1) by default.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 03/42] i965: Adjust gen check in can_do_pipelined_register_writes

2015-11-18 Thread Francisco Jerez

Jordan Justen  writes:

> From: Francisco Jerez 
>
> Allow for pipelined register writes for gen < 7.
>
> v2:
>  * Split from another patch and adjust comment (jljusten)
>
> Reviewed-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/intel_extensions.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index f70f403..81215db 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -40,8 +40,11 @@
>  static bool
>  can_do_pipelined_register_writes(struct brw_context *brw)
>  {
> -   /* Supposedly, Broadwell just works. */
> -   if (brw->gen >= 8)
> +   /**
> +* gen >= 8 specifically allows these writes. gen <= 6 also
> +* doesn't block them.
> +*/

Not only they don't block them, they don't block *anything* AFAIK...

> +   if (brw->gen != 7)
>return true;
>  
> static int result = -1;
> -- 
> 2.6.2


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 92987] fails to determine screen refresh rate when rendering is offloaded

2015-11-18 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=92987

Bug ID: 92987
   Summary: fails to determine screen refresh rate when rendering
is offloaded
   Product: Mesa
   Version: unspecified
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: minor
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: oli...@neukum.org
QA Contact: mesa-dev@lists.freedesktop.org

This is seen with glxgears and rendering offloaded to Radeon with Intel Haswell
gfx. It is independent of the display used.

Glxgears renders with maximum rate if Radeon is used to render (DRI_PRIME=1)
but limits itself to the refresh rate of 60Hz if Intel is used for rendering
(DRI_PRIME=0)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] [v2] i965: Add lossless compression to surface format table

2015-11-18 Thread Pohjolainen, Topi

On Tue, Nov 17, 2015 at 05:30:06PM -0800, Ben Widawsky wrote:
> Background: Prior to Skylake and since Ivybridge Intel hardware has had the
> ability to use a MCS (Multisample Control Surface) as auxiliary data in
> "compression" operations on the surface. This reduces memory bandwidth.  This
> hardware was either used for MSAA compression, and fast clear operations.  On

This says:

... either ... , and ... 

should it have been

... either ... or ... 


All in all, I really appreciate the thorough explanation here in this commit,
just had to check. I know I'm late with my comments, so bare with me.

> Gen8, a similar mechanism exists to allow the hiz buffer to be sampled from, 
> and
> therefore this feature is sometimes referred to more generally as "AUX 
> buffers".
> 
> Skylake adds the ability to have the display engine directly source compressed
> surfaces on top of the ability to sample from them. Inference dictates that
> enabling this display features adding a restriction to the formats which could

s/adding/adds/ ?

> actually be compressed. The current set of surfaces seems to be a subset as
> compared to previous gens (see the next patch). Also, if I had to guess I 
> would
> guess that future gens add support for more surface formats. To make handling
> this a bit easier to read, and more future proof, the support for this is 
> moved
> into the surface formats table.
> 
> Along with the modifications to the table, a helper function is also provided 
> to
> determine if a surface is CCS compatible.  Because fast clears are currently
> disabled on SKL, we can plumb the helper all the way through here, and not
> actually have anything break.
> 
> The logic in the table works a bit differently than the other columns in the
> table and therefore deserves a small mention. For most other features, the GEN

I have difficulty here also: the sentence compares table to other columns in
the table ("... logic in the table ... than other columns...").

Did you mean to say that a particular _column_ in the table behaves
differently than the others?

> which began implementing it is set, and it is assumed future gens also support
> this. For this feature, GEN9 actually eliminates support for certain formats. 
> We
> could use this column to determine support for the similar feature on older

And here you refer to the newly added column? Comparing the contents of that
column to the supported render targets (column RT) gives you the delta between
gen9 and older?

> generation hardware. Aside from that being an error prone task which is
> unrelated to enabling this on GEN9, it becomes somewhat tricky to implement
> because of the fact that surface format support diminishes. You'd probably 
> want
> another column to cleanly implement it.

And this is what you did, right?

Again, sorry for the 20 questions.

Otherwise the patch makes sense to me:

Reviewed-by: Topi Pohjolainen 

(You probably want Chad and Neil to give their consent also).

> 
> v2:
> - rename ccs to ccs_e; Requested-by: Chad
> - rename lossless_compression to lossless_compression Requested-by: Chad
> - change meaning of brw_losslessly_compressible_format Requested-by: Chad
>   - related changes to the code to reflect this.
> - remove excess ccs (Chad)
> 
> Requested-by: Chad Versace 
> Requested-by: Neil Roberts 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h |   2 +
>  src/mesa/drivers/dri/i965/brw_surface_formats.c | 525 
> +---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c   |   7 +-
>  3 files changed, 282 insertions(+), 252 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 8d6bc19..fe45edb 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1467,6 +1467,8 @@ void brw_upload_image_surfaces(struct brw_context *brw,
>  /* brw_surface_formats.c */
>  bool brw_render_target_supported(struct brw_context *brw,
>   struct gl_renderbuffer *rb);
> +bool brw_losslessly_compressible_format(struct brw_context *brw,
> +uint32_t brw_format);
>  uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
>  mesa_format brw_lower_mesa_image_format(const struct brw_device_info 
> *devinfo,
>  mesa_format format);
> diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c 
> b/src/mesa/drivers/dri/i965/brw_surface_formats.c
> index 97fff60..16f7fec 100644
> --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
> +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
> @@ -39,14 +39,15 @@ struct surface_format_info {
> int input_vb;
> int streamed_output_vb;
> int color_processing;
> +   int lossless_compression;
>

Re: [Mesa-dev] [PATCH v2 33/42] glsl: Check for SSBO variable in SSBO atomic lowering

2015-11-18 Thread Iago Toral

On Tue, 2015-11-17 at 21:55 -0800, Jordan Justen wrote:
> When an atomic function is called, we need to check to see if it is
> for an SSBO variable before lowering it to the SSBO specific intrinsic
> function.
> 
> v2:
>  * is_in_buffer_block => is_in_shader_storage_block (Iago)
> 
> Signed-off-by: Jordan Justen 
> Cc: Samuel Iglesias Gonsalvez 
> Cc: Iago Toral Quiroga 
> Reviewed-by: Iago Toral Quiroga 
> ---
>  src/glsl/lower_ubo_reference.cpp | 14 ++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/src/glsl/lower_ubo_reference.cpp 
> b/src/glsl/lower_ubo_reference.cpp
> index 915db6c..667a80e 100644
> --- a/src/glsl/lower_ubo_reference.cpp
> +++ b/src/glsl/lower_ubo_reference.cpp
> @@ -862,6 +862,20 @@ 
> lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
>  ir_call *
>  lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
>  {
> +   exec_list& params = ir->actual_parameters;
> +
> +   if (params.length() < 2)
> +  return ir;

It is not very relevant but if we really want to return early if the
number of parameters does not match any of the atomic functions then we
should also check if they are > 3.

Either way:
Reviewed-by: Iago Toral Quiroga 

> +   ir_rvalue *rvalue =
> +  ((ir_instruction *) params.get_head())->as_rvalue();
> +   if (!rvalue)
> +  return ir;
> +
> +   ir_variable *var = rvalue->variable_referenced();
> +   if (!var || !var->is_in_shader_storage_block())
> +  return ir;
> +
> const char *callee = ir->callee_name();
> if (!strcmp("__intrinsic_atomic_add", callee) ||
> !strcmp("__intrinsic_atomic_min", callee) ||


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] [v3] i965/skl: skip fast clears for certain surface formats

2015-11-18 Thread Pohjolainen, Topi

On Tue, Nov 17, 2015 at 05:31:12PM -0800, Ben Widawsky wrote:
> Some of the information originally in this commit message is now in the patch
> before this.
> 
> SKL adds compressible render targets and as a result mutates some of the
> programming for fast clears and resolves. There is a new internal surface type
> called the CCS. The old AUX_MCS bit becomes AUX_CCS_D. "The Auxiliary surface 
> is
> a CCS (Color Control Surface) with compression disabled or an MCS with
> compression enabled, depending on number of multisamples. MCS (Multisample

I find this wording (even though it looks to be directly from the spec
somewhat misleading. I read it suggesting that AUX can be CCS only when
compression is disabled. I like another wording in "Auxiliary Surfaces For
Sampled Tiled Resource" better: "CCS is used to indicate that the color
surface is losslessly compressed."

Anyway the patch itself:

Reviewed-by: Topi Pohjolainen 

> Control Surface) is a special type of CCS."
> 
> The formats which are supported are defined in the table titled "Render Target
> Surface Types [SKL+]". There is no PRM yet to reference. The previously
> implemented helper function already does the right thing provided the table is
> correct.
> 
> v2: Use better English in commit message (Matt)
> s/compressable/compressible/ (Matt)
> Don't compare bools to true (Matt)
> Use the helper function and don't increase the context size - this is mostly
> implemented in the patch just before this (Chad, Neil)
> Remove an "invalid" assert (Chad)
> Fix assertion to check num_samples > 1, instead of num_samples (Chad)
> 
> v3:
> Use Matt's code as Requested-by: Chad. I didn't even look at it since Chad 
> said
> he was fine with that, and presumably Matt is fine with it.
> 
> Cc: Chad Versace 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_surface_formats.c | 52 
> -
>  src/mesa/drivers/dri/i965/gen8_surface_state.c  |  8 +++-
>  2 files changed, 33 insertions(+), 27 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c 
> b/src/mesa/drivers/dri/i965/brw_surface_formats.c
> index 16f7fec..eb981d6 100644
> --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
> +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
> @@ -90,9 +90,9 @@ struct surface_format_info {
>   */
>  const struct surface_format_info surface_formats[] = {
>  /* smpl filt shad CK  RT  AB  VB  SO  color ccs_e */
> -   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,x,   R32G32B32A32_FLOAT)
> -   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,x,   R32G32B32A32_SINT)
> -   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,x,   R32G32B32A32_UINT)
> +   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,   90,   R32G32B32A32_FLOAT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32G32B32A32_SINT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32G32B32A32_UINT)
> SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,x,   R32G32B32A32_UNORM)
> SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,x,   R32G32B32A32_SNORM)
> SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,x,   R64G64_FLOAT)
> @@ -109,15 +109,15 @@ const struct surface_format_info surface_formats[] = {
> SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,x,   R32G32B32_SSCALED)
> SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,x,   R32G32B32_USCALED)
> SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R32G32B32_SFIXED)
> -   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x, 60,x,   R16G16B16A16_UNORM)
> -   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,x,   R16G16B16A16_SNORM)
> -   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,x,   R16G16B16A16_SINT)
> -   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,x,   R16G16B16A16_UINT)
> -   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,x,   R16G16B16A16_FLOAT)
> -   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,x,   R32G32_FLOAT)
> +   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x, 60,   90,   R16G16B16A16_UNORM)
> +   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x,   90,   R16G16B16A16_SNORM)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,   90,   R16G16B16A16_SINT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x,   90,   R16G16B16A16_UINT)
> +   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x,   90,   R16G16B16A16_FLOAT)
> +   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x,   90,   R32G32_FLOAT)
> SF( Y, 70,  x,  x,  Y,  Y,  Y,  Y,  x,x,   R32G32_FLOAT_LD)
> -   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,x,   R32G32_SINT)
> -   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,x,   R32G32_UINT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32G32_SINT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x,   90,   R32G32_UINT)
> SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x,x,   R32_FLOAT_X8X24_TYPELESS)
> SF( Y,  x,  x,  x,  x,  x,  x,  x,  x,x,   X32_TYPELESS_G8X24_UINT)
> SF( Y, 50,  x,  x,  x,  x,  x,  x,  x,x,   L32A32_FLOAT)
> @@ -125,7 +125,7 @@ const struct surface_format_info surface_formats[] = {
> SF( x,  x,  x,  x,  x,  x,  Y,

Re: [Mesa-dev] [PATCH v2 34/42] glsl: Check for SSBO variable in check_for_ssbo_store

2015-11-18 Thread Iago Toral

Reviewed-by: Iago Toral Quiroga 

On Tue, 2015-11-17 at 21:55 -0800, Jordan Justen wrote:
> The compiler probably already blocks this earlier on, but we should be
> checking for an SSBO here.
> 
> Signed-off-by: Jordan Justen 
> Cc: Samuel Iglesias Gonsalvez 
> Cc: Iago Toral Quiroga 
> ---
>  src/glsl/lower_ubo_reference.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/glsl/lower_ubo_reference.cpp 
> b/src/glsl/lower_ubo_reference.cpp
> index 667a80e..cf55a2e 100644
> --- a/src/glsl/lower_ubo_reference.cpp
> +++ b/src/glsl/lower_ubo_reference.cpp
> @@ -723,7 +723,7 @@ 
> lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
>return;
>  
> ir_variable *var = ir->lhs->variable_referenced();
> -   if (!var || !var->is_in_buffer_block())
> +   if (!var || !var->is_in_shader_storage_block())
>return;
>  
> /* We have a write to a buffer variable, so declare a temporary and 
> rewrite


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 20/42] glsl: Remove mem_ctx as member variable in lower_ubo_reference_visitor

2015-11-18 Thread Iago Toral

Reviewed-by:; Iago Toral Quiroga 

On Tue, 2015-11-17 at 21:54 -0800, Jordan Justen wrote:
> Signed-off-by: Jordan Justen 
> Cc: Iago Toral Quiroga 
> ---
>  src/glsl/lower_ubo_reference.cpp | 64 
> +---
>  1 file changed, 34 insertions(+), 30 deletions(-)
> 
> diff --git a/src/glsl/lower_ubo_reference.cpp 
> b/src/glsl/lower_ubo_reference.cpp
> index 5082da8..2808ac1 100644
> --- a/src/glsl/lower_ubo_reference.cpp
> +++ b/src/glsl/lower_ubo_reference.cpp
> @@ -54,24 +54,23 @@ public:
> void handle_rvalue(ir_rvalue **rvalue);
> ir_visitor_status visit_enter(ir_assignment *ir);
>  
> -   void setup_for_load_or_store(ir_variable *var,
> +   void setup_for_load_or_store(void *mem_ctx,
> +ir_variable *var,
>  ir_rvalue *deref,
>  ir_rvalue **offset,
>  unsigned *const_offset,
>  bool *row_major,
>  int *matrix_columns,
>  unsigned packing);
> -   ir_expression *ubo_load(const struct glsl_type *type,
> +   ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
>  ir_rvalue *offset);
> -   ir_call *ssbo_load(const struct glsl_type *type,
> +   ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
>ir_rvalue *offset);
>  
> void check_for_ssbo_store(ir_assignment *ir);
> -   void write_to_memory(ir_dereference *deref,
> -ir_variable *var,
> -ir_variable *write_var,
> -unsigned write_mask);
> -   ir_call *ssbo_store(ir_rvalue *deref, ir_rvalue *offset,
> +   void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable 
> *var,
> +ir_variable *write_var, unsigned write_mask);
> +   ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
> unsigned write_mask);
>  
> enum {
> @@ -94,7 +93,7 @@ public:
> ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
>  ir_dereference *,
>  ir_variable *);
> -   ir_expression *emit_ssbo_get_buffer_size();
> +   ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
>  
> unsigned calculate_unsized_array_stride(ir_dereference *deref,
> unsigned packing);
> @@ -103,7 +102,6 @@ public:
> ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
> ir_visitor_status visit_enter(ir_call *ir);
>  
> -   void *mem_ctx;
> struct gl_shader *shader;
> struct gl_uniform_buffer_variable *ubo_var;
> ir_rvalue *uniform_block;
> @@ -242,7 +240,8 @@ interface_field_name(void *mem_ctx, char *base_name, 
> ir_rvalue *d,
>  }
>  
>  void
> -lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
> +lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
> + ir_variable *var,
>   ir_rvalue *deref,
>   ir_rvalue **offset,
>   unsigned *const_offset,
> @@ -307,7 +306,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
> **rvalue)
> if (!var || !var->is_in_buffer_block())
>return;
>  
> -   mem_ctx = ralloc_parent(shader->ir);
> +   void *mem_ctx = ralloc_parent(shader->ir);
>  
> ir_rvalue *offset = NULL;
> unsigned const_offset;
> @@ -322,7 +321,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
> **rvalue)
> /* Compute the offset to the start if the dereference as well as other
>  * information we need to configure the write
>  */
> -   setup_for_load_or_store(var, deref,
> +   setup_for_load_or_store(mem_ctx, var, deref,
> , _offset,
> _major, _columns,
> packing);
> @@ -352,7 +351,8 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
> **rvalue)
>  }
>  
>  ir_expression *
> -lower_ubo_reference_visitor::ubo_load(const glsl_type *type,
> +lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
> +  const glsl_type *type,
> ir_rvalue *offset)
>  {
> ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
> @@ -371,7 +371,8 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state 
> *state)
>  }
>  
>  ir_call *
> -lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref,
> +lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
> +ir_rvalue *deref,
>

Re: [Mesa-dev] [PATCH] [v2] i965: Add lossless compression to surface format table

2015-11-18 Thread Pohjolainen, Topi

On Wed, Nov 18, 2015 at 11:10:12AM +0200, Pohjolainen, Topi wrote:
> On Tue, Nov 17, 2015 at 05:30:06PM -0800, Ben Widawsky wrote:
> > Background: Prior to Skylake and since Ivybridge Intel hardware has had the
> > ability to use a MCS (Multisample Control Surface) as auxiliary data in
> > "compression" operations on the surface. This reduces memory bandwidth.  
> > This
> > hardware was either used for MSAA compression, and fast clear operations.  
> > On
> 
> This says:
> 
> ... either ... , and ... 
> 
> should it have been
> 
> ... either ... or ... 
> 
> 
> All in all, I really appreciate the thorough explanation here in this commit,
> just had to check. I know I'm late with my comments, so bare with me.
> 
> > Gen8, a similar mechanism exists to allow the hiz buffer to be sampled 
> > from, and
> > therefore this feature is sometimes referred to more generally as "AUX 
> > buffers".
> > 
> > Skylake adds the ability to have the display engine directly source 
> > compressed
> > surfaces on top of the ability to sample from them. Inference dictates that
> > enabling this display features adding a restriction to the formats which 
> > could
> 
> s/adding/adds/ ?
> 
> > actually be compressed. The current set of surfaces seems to be a subset as
> > compared to previous gens (see the next patch). Also, if I had to guess I 
> > would
> > guess that future gens add support for more surface formats. To make 
> > handling
> > this a bit easier to read, and more future proof, the support for this is 
> > moved
> > into the surface formats table.
> > 
> > Along with the modifications to the table, a helper function is also 
> > provided to
> > determine if a surface is CCS compatible.  Because fast clears are currently
> > disabled on SKL, we can plumb the helper all the way through here, and not
> > actually have anything break.
> > 
> > The logic in the table works a bit differently than the other columns in the
> > table and therefore deserves a small mention. For most other features, the 
> > GEN
> 
> I have difficulty here also: the sentence compares table to other columns in
> the table ("... logic in the table ... than other columns...").
> 
> Did you mean to say that a particular _column_ in the table behaves
> differently than the others?
> 
> > which began implementing it is set, and it is assumed future gens also 
> > support
> > this. For this feature, GEN9 actually eliminates support for certain 
> > formats. We
> > could use this column to determine support for the similar feature on older
> 
> And here you refer to the newly added column? Comparing the contents of that
> column to the supported render targets (column RT) gives you the delta between
> gen9 and older?
> 
> > generation hardware. Aside from that being an error prone task which is
> > unrelated to enabling this on GEN9, it becomes somewhat tricky to implement
> > because of the fact that surface format support diminishes. You'd probably 
> > want
> > another column to cleanly implement it.
> 
> And this is what you did, right?
> 
> Again, sorry for the 20 questions.
> 
> Otherwise the patch makes sense to me:
> 
> Reviewed-by: Topi Pohjolainen 
> 
> (You probably want Chad and Neil to give their consent also).
> 
> > 
> > v2:
> > - rename ccs to ccs_e; Requested-by: Chad
> > - rename lossless_compression to lossless_compression Requested-by: Chad
> > - change meaning of brw_losslessly_compressible_format Requested-by: Chad
> >   - related changes to the code to reflect this.
> > - remove excess ccs (Chad)
> > 
> > Requested-by: Chad Versace 
> > Requested-by: Neil Roberts 
> > Signed-off-by: Ben Widawsky 
> > ---
> >  src/mesa/drivers/dri/i965/brw_context.h |   2 +
> >  src/mesa/drivers/dri/i965/brw_surface_formats.c | 525 
> > +---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c   |   7 +-
> >  3 files changed, 282 insertions(+), 252 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> > b/src/mesa/drivers/dri/i965/brw_context.h
> > index 8d6bc19..fe45edb 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.h
> > +++ b/src/mesa/drivers/dri/i965/brw_context.h
> > @@ -1467,6 +1467,8 @@ void brw_upload_image_surfaces(struct brw_context 
> > *brw,
> >  /* brw_surface_formats.c */
> >  bool brw_render_target_supported(struct brw_context *brw,
> >   struct gl_renderbuffer *rb);
> > +bool brw_losslessly_compressible_format(struct brw_context *brw,
> > +uint32_t brw_format);
> >  uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
> >  mesa_format brw_lower_mesa_image_format(const struct brw_device_info 
> > *devinfo,
> >  mesa_format format);
> > diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c 
> >

Re: [Mesa-dev] llvm TGSI backend (WIP) questions

2015-11-18 Thread Hans de Goede


Hi,

On 13-11-15 19:51, Tom Stellard wrote:

On Fri, Nov 13, 2015 at 02:46:52PM +0100, Hans de Goede wrote:

Hi All,

So as discussed I've started working on a TGSI backend for
llvm to use as a way to get compute going on nouveau (and other gpu-s).

I'm still learning all the ins and outs of llvm so I do not have
much to show yet.

I've rebased Francisco's (curro's) latest version on top of llvm
trunk, and added a commit on top to actual get it build with the
latest trunk. So currently I'm at the point where I've just
taken Francisco's code, and made it compile, no more and no less.

I have a git repo with this work available here:

http://cgit.freedesktop.org/~jwrdegoede/llvm/

So the next step would be to test this and see if it actually
does anything, questions:

1) Does anyone have a simple test case / command where I can
invoke just llvm and get TGSI asm output to check ?



The easiest way to do this is with the llc tool which ships with llvm.
It compiles LLVM IR to target code, which in this case is tgsi.
I would recommend taking one of the simple examples from
test/CodeGen/AMDGPU (you may need to get these from llvm trunk, not sure
what llvm version you are using).

To use llc:

llc -march=tgsi input.ll -o -


This will output TGSI.


So after some bugfixing to fix a bunch of segfaults I get:

$ bin/llc -march=tgsi ../test/CodeGen/AMDGPU/add.ll -o -

# BB#0:
UADDs TEMP0x, TEMP0x, 0
LOADgis TEMP1z, [TEMP1y]
UADDs TEMP1y, TEMP1y, 4
LOADgis TEMP1y, [TEMP1y]
UADDs TEMP1y, TEMP1z, TEMP1y
STOREgis [TEMP1x], TEMP1y
UADDs TEMP0x, TEMP0x, 0
RET
ENDSUB

and add.ll has:

;FUNC-LABEL: {{^}}test1:
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}}
;SI-NOT: [[REG]]
;SI: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
  %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
  %a = load i32, i32 addrspace(1)* %in
  %b = load i32, i32 addrspace(1)* %b_ptr
  %result = add i32 %a, %b
  store i32 %result, i32 addrspace(1)* %out
  ret void
}

So the generated code for test1 resmbles the input somewhat but is in no way 
correct,
e.g. I do not understand why it is assuming that both TEMP0x and TEMP1z contain 
the
address of the array with the 2 input integers. Nor do I understand why it is 
using
TEMP1z and TEMP1y as sources for the UADD, where it has been doing the LOAD-s to
TEMP0x and and TEMP1y

And then we've function test2 in add.ll

;FUNC-LABEL: {{^}}test2:
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}

define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
  %result = add <2 x i32> %a, %b
  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
  ret void
}

Which completely makes the tgsi backend unhappy:

LLVM ERROR: Cannot select: t43: i32,ch = load t45:1, 
FrameIndex:i32<0>, undef:i32
t41: i32 = FrameIndex<0>
t8: i32 = undef
In function: test2

Any hints on where to start looking with fixing these issues would be much
appreciated.

Regards,

Hans
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 92985] Mac OS X build error "ar: no archive members specified"

2015-11-18 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=92985

--- Comment #1 from Emil Velikov  ---
I'm assuming that this fails as said ar expects to create a non empty archive. 
Thus moving the noinst_LTLIBRARIES += libloader_dri3_helper.la into the if
HAVE_DRI3 section should fix things.

Seems that we're also missing the XCB_DRI3_CFLAGS in the src/loader/Makefile.am

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] The i965 vec4 backend, exec_masks, and 64-bit types

2015-11-18 Thread Francisco Jerez

Connor Abbott  writes:

> On Tue, Nov 3, 2015 at 8:04 PM, Francisco Jerez  wrote:
>> Francisco Jerez  writes:
>>
>>> Connor Abbott  writes:
>>>
 Hi all,

 While working on FP64 for i965, there's an issue that I thought of
 with the vec4 backend that I'm not sure how to resolve. From what I
 understand, the execmask works the same way in Align16 mode as Align1
 mode, except that you only use the first 8 channels in practice for
 SIMD4x2, and the first four channels are always the same as well as
 the last 4 channels. But this doesn't work for 64-bit things, since
 there we only operate on 4 components at the same time, so it's more
 like SIMD2x2. For example, imagine that only the second vertex is
 currently enabled at the moment. Then the execmask looks like
 , and if we do something like:

 mul(4)  g24<1>DF g12<4,4,1>DF g13<4,4,1>DF { align16 };

 then all 4 channels will be disabled, which is not what we want.

>>> AFAIUI this shouldn't be a problem.  In align16 mode each component of
>>> an instruction with double-precision execution type maps to *two* bits
>>> of the execmask instead of one (one for each 32-bit half), which is
>>> compensated by each logical thread having two components instead of
>>> four, so in your example [assuming  is little-endian notation
>>> and you actually do 'mul(8)' ;)] the x and y components of the first
>>> logical thread will be disabled while the x and y components of the
>>> second logical thread will be enabled.
>
> That certainly makes sense... I just couldn't find a doc reference to
> confirm or deny it.
>
>>>
>>
>> I've had a look into the simulator's behaviour, and in fact HSW+ seem to
>> sort of support actual SIMD4x2 on DF types, so when you do stuff like
>>
>> | mul(8)   g24.xyzw:dfg12<4>.xyzw:df  g12<4>.xyzw:df { align16 };
>>
>> it will actually write 8 double floats to g24-25 (using a nibble from
>> the execmask for each vec4), what contradicts the hardware spec:
>>
>> | IVB+
>> |
>> | In Align16 mode, all regioning parameters must use the syntax of a pair
>> | of packed floats, including channel selects and channel enables.
>> |
>> | // Example:
>> | mov (8) r10.0.xyzw:df r11.0.xyzw:df
>> | // The above instruction moves four double floats. The .x picks the
>> | // low 32 bits and the .y picks the high 32 bits of the double float.
>>
>> (I believe the quotation above may only apply to IVB even though it's
>>  marked IVB+).
>
> Thanks for looking into this. Indeed, at least on BDW the exec_size
> does need to be divided by 2 (I have a patch on my branch that does
> this, and it fixed a number of piglit tests). That's why the example I
> wrote had an exec_size of 4.
>

Uhm...  The thing is that on HSW+ you get 4 actual FP64 channels per
vertex, so if you set the execution size to 4 only the channels of the
first vertex will be executed and you'll definitely run into the problem
you described in your original e-mail.  IOW the execution size needs to
be 8 on HSW+ for the channel enables to be applied correctly unless you
use NoMask and apply the channel enables later on using moves, or split
the instruction in half and use NibCtrl to select the right channel
enable signals as you suggested earlier.

(Sorry for the late reply BTW, I was on vacation last week).

>>
>> Now the really weird thing I've noticed: A DF Align16 instruction with
>> writemask XY will actually write components XZ of each vec4, and
>> writemask ZW actually writes components YW (!).  Other writemasks seem
>> to behave normally (including all scalar ones).  I haven't found any
>> mention of this in the docs, but a quick test on real hardware confirms
>> the simulator's behaviour.
>
> Ugh, really... that sucks :/
>
>>
>> Swizzles OTOH still shuffle individual 32-bit fields and are extended
>> cyclically into the ZW components of the instruction (how useful).
>>
>> I wonder if we would be better off scalarizing all FP64 code...
>
> Yeah, maybe we could get away with putting each component into a
> separate register, and always using XYZW writemasks... but we'd still
> need to pack two things into a single dvec2 for e.g. SSBO's, so it
> wouldn't work there. We don't support them today, although I'm still
> not 100% sure we can always get rid of all the packing operations...
> and relying on the optimizations to get rid of them seems kinda
> fragile. We could make dvec2() work using normal 32-bit MOV's,
> although at that point it might be easier not to scalarize and instead
> have double operations output to temporaries and then use a 32-bit MOV
> to apply the right writemask.
>
Relying on optimizations to get rid of packing sounds reasonable to me,
the packing could be done using an Align1 move like:

 mov (8) r0.0<1>:d r1.0<4,2,1>:d

Another alternative would be to emit actual dvec4 instructions (with any

[Mesa-dev] [Bug 92985] Mac OS X build error "ar: no archive members specified"

2015-11-18 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=92985

--- Comment #2 from Martin Peres  ---
(In reply to Emil Velikov from comment #1)
> I'm assuming that this fails as said ar expects to create a non empty
> archive. 
> Thus moving the noinst_LTLIBRARIES += libloader_dri3_helper.la into the if
> HAVE_DRI3 section should fix things.
> 
> Seems that we're also missing the XCB_DRI3_CFLAGS in the
> src/loader/Makefile.am

Pretty sure I tested that creating an empty libloader_dri3_helper.la was no
problem on Linux. It may be on Mac OS.

In any case, I will add it to my list of stuff to do tomorrow, after figuring
out why kwin fails when using EGL (spent some time on it today, the EGLConfig
is empty(??)) and fixed the export of the symbols (unless Emil wants to do it
first).

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] i965: Allow indirect GS input indexing in the scalar backend.

2015-11-18 Thread Kristian Høgsberg

On Sat, Nov 7, 2015 at 9:04 PM, Kenneth Graunke  wrote:
> This allows arbitrary non-constant indices on GS input arrays,
> both for the vertex index, and any array offsets beyond that.
>
> All indirects are handled via the pull model.  We could potentially
> handle indirect addressing of pushed data as well, but it would add
> additional code complexity, and we usually have to pull inputs anyway
> due to the sheer volume of input data.  Plus, marking pushed inputs
> as live due to indirect addressing could exacerbate register pressure
> problems pretty badly.  We'd need to be careful.

I like how this gets rid of the loop to rewrite
SHADER_OPCODE_URB_READ_SIMD8 insts in assign_gs_urb_setup().

This looks good - assuming rebase on master and update to use
SHADER_OPCODE_MOV_INDIRECT,

Reviewed-by: Kristian Høgsberg 

> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp |  17 
>  src/mesa/drivers/dri/i965/brw_fs.h   |   3 +-
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 129 
> ---
>  src/mesa/drivers/dri/i965/brw_shader.cpp |   3 +
>  4 files changed, 106 insertions(+), 46 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index ee10c9d..9d00379 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1636,24 +1636,7 @@ fs_visitor::assign_gs_urb_setup()
> first_non_payload_grf +=
>8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in;
>
> -   const unsigned first_icp_handle = payload.num_regs -
> -  (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0);
> -
> foreach_block_and_inst(block, fs_inst, inst, cfg) {
> -  /* Lower URB_READ_SIMD8 opcodes into real messages. */
> -  if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) {
> - assert(inst->src[0].file == IMM);
> - inst->src[0] = retype(brw_vec8_grf(first_icp_handle +
> -inst->src[0].fixed_hw_reg.dw1.ud,
> -0), BRW_REGISTER_TYPE_UD);
> - /* for now, assume constant - we can do per-slot offsets later */
> - assert(inst->src[1].file == IMM);
> - inst->offset = inst->src[1].fixed_hw_reg.dw1.ud;
> - inst->src[1] = fs_reg();
> - inst->mlen = 1;
> - inst->base_mrf = -1;
> -  }
> -
>/* Rewrite all ATTR file references to HW_REGs. */
>convert_attr_sources_to_hw_regs(inst);
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index fb70f0c..67f9f59 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -302,7 +302,8 @@ public:
> unsigned stream_id);
> void emit_gs_thread_end();
> void emit_gs_input_load(const fs_reg , const nir_src _src,
> -   unsigned offset, unsigned num_components);
> +   const fs_reg _offset, unsigned 
> imm_offset,
> +   unsigned num_components);
> void emit_cs_terminate();
> fs_reg *emit_cs_local_invocation_id_setup();
> fs_reg *emit_cs_work_group_id_setup();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 7f033f2..3bc02c5 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1543,42 +1543,112 @@ fs_visitor::emit_gs_vertex(const nir_src 
> _count_nir_src,
>  void
>  fs_visitor::emit_gs_input_load(const fs_reg ,
> const nir_src _src,
> -   unsigned input_offset,
> +   const fs_reg _offset,
> +   unsigned imm_offset,
> unsigned num_components)
>  {
> -   const brw_vue_prog_data *vue_prog_data = (const brw_vue_prog_data *) 
> prog_data;
> -   const unsigned vertex = nir_src_as_const_value(vertex_src)->u[0];
> +   struct brw_gs_prog_data *gs_prog_data = (struct brw_gs_prog_data *) 
> prog_data;
>
> -   const unsigned array_stride = vue_prog_data->urb_read_length * 8;
> +   /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y],
> +* VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w].  Only
> +* gl_PointSize is available as a GS input, however, so it must be that.
> +*/
> +   const bool is_point_size =
> +  indirect_offset.file == BAD_FILE && imm_offset == 0;
> +
> +   nir_const_value *vertex_const = nir_src_as_const_value(vertex_src);
> +   const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
> +
> +   if (indirect_offset.file == BAD_FILE && vertex_const != NULL &&
> +   4 * imm_offset < push_reg_count) {
> +  imm_offset = 4 * imm_offset + vertex_const->u[0] *

[Mesa-dev] [PATCH 2/2] nir: extract out helper macros for running passes

2015-11-18 Thread Rob Clark

From: Rob Clark 

Note these are a bit uglier, due to avoidance of GNU C extensions.  But
drivers which do not need to be built with compilers that don't support
the extension can wrap these macros with their own.

Signed-off-by: Rob Clark 
---
 src/glsl/nir/nir.h  | 35 ++
 src/mesa/drivers/dri/i965/brw_nir.c | 38 ++---
 2 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index c2a46ea..b24fa83 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1936,12 +1936,47 @@ nir_shader_mutable(nir_shader *shader)
 void nir_validate_shader(nir_shader *shader);
 void nir_metadata_set_validation_flag(nir_shader *shader);
 void nir_metadata_check_validation_flag(nir_shader *shader);
+
+#include "util/debug.h"
+static inline bool
+should_clone_nir(void)
+{
+   static int should_clone = -1;
+   if (should_clone < 1)
+  should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
+
+   return should_clone;
+}
 #else
 static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
 static inline void nir_metadata_set_validation_flag(nir_shader *shader) { 
(void) shader; }
 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { 
(void) shader; }
+static inline bool should_clone_nir(void) { return false; }
 #endif /* DEBUG */
 
+#define _PASS(nir, do_pass) do { \
+   assert(nir_shader_is_mutable(nir));   \
+   do_pass   \
+   nir_validate_shader(nir); \
+   if (should_clone_nir()) { \
+  nir_shader *clone = nir_shader_clone(nir); \
+  nir_shader_unref(nir); \
+  nir = clone;   \
+   } \
+} while (0)
+
+#define NIR_PASS(progress, nir, pass, ...) _PASS(nir,\
+   nir_metadata_set_validation_flag(nir);\
+   if (pass(nir, ##__VA_ARGS__)) {   \
+  progress = true;   \
+  nir_metadata_check_validation_flag(nir);   \
+   } \
+)
+
+#define NIR_PASS_V(nir, pass, ...) _PASS(nir,\
+   pass(nir, ##__VA_ARGS__); \
+)
+
 void nir_calc_dominance_impl(nir_function_impl *impl);
 void nir_calc_dominance(nir_shader *shader);
 
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index 4c5e036..05d5f82 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -171,42 +171,8 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
}
 }
 
-#include "util/debug.h"
-
-static bool
-should_clone_nir()
-{
-   static int should_clone = -1;
-   if (should_clone < 1)
-  should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
-
-   return should_clone;
-}
-
-#define _OPT(do_pass) (({\
-   bool this_progress = true;\
-   do_pass   \
-   nir_validate_shader(nir); \
-   if (should_clone_nir()) { \
-  nir_shader *clone = nir_shader_clone(nir); \
-  ralloc_free(nir);  \
-  nir = clone;   \
-   } \
-   this_progress;\
-}))
-
-#define OPT(pass, ...) _OPT(   \
-   nir_metadata_set_validation_flag(nir);  \
-   this_progress = pass(nir ,##__VA_ARGS__);   \
-   if (this_progress) {\
-  progress = true; \
-  nir_metadata_check_validation_flag(nir); \
-   }   \
-)
-
-#define OPT_V(pass, ...) _OPT( \
-   pass(nir, ##__VA_ARGS__);   \
-)
+#define OPT(pass, ...)   NIR_PASS(progress, nir, pass, ##__VA_ARGS__)
+#define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
 
 static nir_shader *
 nir_optimize(nir_shader *nir, bool is_scalar)
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] i965/fs: Add support for gl_HelperInvocation system value.

2015-11-18 Thread Matt Turner

On Wed, Nov 18, 2015 at 3:09 PM, Ilia Mirkin  wrote:
> On Wed, Nov 18, 2015 at 6:06 PM, Matt Turner  wrote:
>> In most cases (when the negate is copy propagated and the MOV removed),
>> this is two instructions on Gen >= 8 and only two instructions on
>> earlier platforms -- and it doesn't use the flag register.
>> ---
>> Thanks Ilia!
>>
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 52 
>> 
>>  1 file changed, 52 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index c282f83..5a5b1d9 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -250,6 +250,57 @@ emit_system_values_block(nir_block *block, void 
>> *void_visitor)
>>  *reg = *v->emit_cs_work_group_id_setup();
>>   break;
>>
>> +  case nir_intrinsic_load_helper_invocation:
>> + assert(v->stage == MESA_SHADER_FRAGMENT);
>> + reg = >nir_system_values[SYSTEM_VALUE_HELPER_INVOCATION];
>> + if (reg->file == BAD_FILE) {
>> +const fs_builder abld =
>> +   v->bld.annotate("gl_HelperInvocation", NULL);
>> +
>> +/* On Gen6+ (gl_HelperInvocation is only exposed on Gen7+) the
>> + * pixel mask is in g1.7 of the thread payload.
>> + *
>> + * We move the per-channel pixel enable bit to the low bit of 
>> each
>> + * channel by shifting the byte containing the pixel mask by the
>> + * vector immediate 0x76543210UV.
>> + *
>> + * The region of <1,8,0> reads only 1 byte (the pixel masks for
>> + * subspans 0 and 1) in SIMD8 and an additional byte (the pixel
>> + * masks for 2 and 3) in SIMD16.
>> + */
>> +fs_reg shifted = abld.vgrf(BRW_REGISTER_TYPE_UW, 1);
>> +abld.SHR(shifted,
>> + stride(byte_offset(retype(brw_vec1_grf(1, 0),
>> +   BRW_REGISTER_TYPE_UB), 28),
>> +1, 8, 0),
>> + brw_imm_uv(0x76543210));
>> +
>> +/* A set bit in the pixel mask means the channel is enabled, but
>> + * that is the opposite of gl_HelperInvocation so we need to 
>> invert
>> + * the mask.
>> + *
>> + * The negate source-modifier bit of logical instructions on 
>> Gen8+
>> + * performs 1's complement negation, so we can use that instead 
>> of
>> + * a NOT instruction.
>> + */
>> +fs_reg inverted  = negate(shifted);
>
> Perhaps a single space before the = is enough here? [Can fix up before
> pushing after someone competent reviews...]

Thanks for catching that -- it is indeed a mistake.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/8] Implement EXT_shader_samples_identical

2015-11-18 Thread Jason Ekstrand

On Wed, Nov 18, 2015 at 4:23 PM, Kenneth Graunke  wrote:
> On Wednesday, November 18, 2015 03:46:46 PM Ian Romanick wrote:
>> This patch series implements a new GL extension,
>> EXT_shader_samples_identical.  This extension allows shaders to
>> determine when all of the samples in a particular texel are the same.
>> This takes advantage of the way compressed multisample surfaces are
>> stored on modern Intel and AMD hardware.  This enables optimizations in
>> application multisample resolve filters, etc.
>>
>> I really wanted to get this in the next Mesa release.  For some reason,
>> I thought the branch point was after Thanksgiving (which is next
>> Thursday).  Ken reminded me yesterday that the branch point is actually
>> this Friday. :( As a result, I'm sending it out today to get review as
>> soon as possible.
>>
>> I also wanted to get as much time as possible for other drivers to get
>> implementations.  I worked with Graham Sellers on this extension, and he
>> assures me that the implementation on modern Radeons is trivial.  My
>> expectation is that it should be about the same as the Intel
>> implementation.
>>
>> There will be some extra TGSI bits needed, but that should also be
>> trivial.  For the NIR and i965 backend bits, I mostly copied and blended
>> the implementations of txf_ms and query_samples.
>>
>> There are currently only trivial piglit tests, but I am working on more.
>> I basically hacked up tests/spec/arb_texture_multisample/texelfetch.c to
>> use the extension to render different colors based on whether
>> textureSamplesIdenticalEXT returned true or false.  The resulting image
>> and the generated assembly look good.  My plan is to get a set of real
>> tests out by midday tomorrow.
>>
>> As soon as we're confident that the spec is good, I'll submit it to
>> Khronos for publication in the registry.  I'm still waiting on feedback
>> from another closed-source driver writer.
>
> Other than the few comments I had, this looks good to me.
>
> Series is:
> Reviewed-by: Kenneth Graunke 
>
> I look forward to seeing it in action in meta_blit.c.
>
> It sounds like Chris plans to review it as well, which is great.

I made a few comments on the last three patches, only one of which (on
the last patch) is really critical.  The rest we can clean up later if
you don't want to deal with it now.
--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 8/8] i965: Enable EXT_shader_samples_identical

2015-11-18 Thread Jason Ekstrand

On Wed, Nov 18, 2015 at 5:31 PM, Ian Romanick  wrote:
> On 11/18/2015 05:02 PM, Jason Ekstrand wrote:
>> On Wed, Nov 18, 2015 at 4:06 PM, Kenneth Graunke  
>> wrote:
>>> On Wednesday, November 18, 2015 03:46:54 PM Ian Romanick wrote:
 From: Ian Romanick 

 Signed-off-by: Ian Romanick 
 ---
  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  1 +
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 16 
  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp |  1 +
  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +++
  src/mesa/drivers/dri/i965/intel_extensions.c   |  1 +
  5 files changed, 30 insertions(+)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 index 1f71f66..4af1234 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 @@ -2550,6 +2550,7 @@ fs_visitor::nir_emit_texture(const fs_builder , 
 nir_tex_instr *instr)
   switch (instr->op) {
   case nir_texop_txf:
   case nir_texop_txf_ms:
 + case nir_texop_samples_identical:
  coordinate = retype(src, BRW_REGISTER_TYPE_D);
  break;
   default:
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 index a7bd9ce..6688f6a 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 @@ -259,6 +259,22 @@ fs_visitor::emit_texture(ir_texture_opcode op,
lod = fs_reg(0u);
 }

 +   if (op == ir_samples_identical) {
 +  fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 
 1));
 +
 +  if (mcs.file == BRW_IMMEDIATE_VALUE) {
 + fs_reg tmp = vgrf(glsl_type::uint_type);
 +
 + bld.MOV(tmp, mcs);
 + bld.CMP(dst, tmp, src_reg(0u), BRW_CONDITIONAL_EQ);
>>>
>>> Seems a little strange to emit assembly to do the comparison when
>>> you've already determined that the value is a compile time constant.
>>>
>>> Why not just:
>>>
>>>bld.MOV(dst, fs_reg(mcs.ud == 0u ? ~0u : 0u));
>>
>> Actually, getting an immediate here means we don't have an MCS and we
>> have no idea of the samples are identical, so we should return false
>> always.
>
> Derp.  Yeah, that's true.
>
 +  } else {
 + bld.CMP(dst, mcs, src_reg(0u), BRW_CONDITIONAL_EQ);
>>
>> We should also consider handling the clear color case.  In this case,
>> we'll get 0xff for 2x and 0x for 4x or 8x.  Do we know the
>> number of samples in the shader?  We should be able to get that from
>> the sampler or something but then we would have to pass that through
>> the key and that would get gross.
>
> Does that only apply to clear colors that are compatible with
> fast-clear?  In my simple test, it appears that the cleared area returns
> all zeros.

Yes, that's means fast-clear color.

>> One other thought, 16x MSAA will break all this because it gives you a
>> ivec4 value from the MCS (if I remember correctly).  Not sure if we've
>> landed 16x MSAA yet though.
>
> Oof.  I think it has, but I don't think I have a 16x-compatible
> platform.  I guess we could always return false for now if the sampler
> is 16x.

That would work for now.  That said, it wouldn't be that hard to loop
over the result and ensure all values are zero.  However, that
requires that we know how many samples we have.

>> --Jason
>>
 +  }
 +
 +  this->result = dst;
 +  return;
 +   }
 +
 if (coordinate.file != BAD_FILE) {
/* FINISHME: Texture coordinate rescaling doesn't work with 
 non-constant
 * samplers.  This should only be a problem with GL_CLAMP on Gen7.
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
 index 3c2674d..41c3c10 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
 @@ -1615,6 +1615,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
   switch (instr->op) {
   case nir_texop_txf:
   case nir_texop_txf_ms:
 + case nir_texop_samples_identical:
  coordinate = get_nir_src(instr->src[i].src, 
 BRW_REGISTER_TYPE_D,
   src_size);
  coord_type = glsl_type::ivec(src_size);
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 index fda3d7c..2190a86 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 @@ -909,6 +909,17 @@

Re: [Mesa-dev] [PATCH 2/2] mesa: Add test for sorted extension table

2015-11-18 Thread Ian Romanick

On 11/18/2015 03:01 PM, Nanley Chery wrote:
> From: Nanley Chery 
> 
> Enable developers to know if the table's alphabetical sorting
> is maintained or lost.

I like this in principle, but let's be honest.  Almost all of the time,
the people who don't sort (by whatever order we agree) also don't run
'make check'. :(

> Signed-off-by: Nanley Chery 
> ---
>  src/mesa/main/extensions.h  |  1 +
>  src/mesa/main/tests/Makefile.am |  1 +
>  src/mesa/main/tests/mesa_extensions.cpp | 47 
> +
>  3 files changed, 49 insertions(+)
>  create mode 100644 src/mesa/main/tests/mesa_extensions.cpp
> 
> diff --git a/src/mesa/main/extensions.h b/src/mesa/main/extensions.h
> index 1615e1c..7114c96 100644
> --- a/src/mesa/main/extensions.h
> +++ b/src/mesa/main/extensions.h
> @@ -85,6 +85,7 @@ enum {
>  #define EXT(name_str, ...) MESA_EXTENSION_##name_str,
>  #include "extensions_table.h"
>  #undef EXT
> +   MESA_EXTENSION_COUNT
>  };

In partial response to Ilia's and Emil's feedback... this test doesn't
really care about extensions.h.  It cares about the data in
extensions_table.h.  You could instead include extensions_table.h
directly in the test.  Then you could define EXT() to be whatever you
want, use ARRAY_SIZE, etc.  That way if we changed the format of
_mes_extension_table (say, changed name to ExtensionName), we wouldn't
have to modify the test.

I don't have a strong opinion.

> diff --git a/src/mesa/main/tests/Makefile.am b/src/mesa/main/tests/Makefile.am
> index bd7ab73..d6977e2 100644
> --- a/src/mesa/main/tests/Makefile.am
> +++ b/src/mesa/main/tests/Makefile.am
> @@ -27,6 +27,7 @@ AM_CPPFLAGS += -DHAVE_SHARED_GLAPI
>  main_test_SOURCES += \
>   dispatch_sanity.cpp \
>   mesa_formats.cpp\
> + mesa_extensions.cpp \
>   program_state_string.cpp
>  
>  main_test_LDADD += \
> diff --git a/src/mesa/main/tests/mesa_extensions.cpp 
> b/src/mesa/main/tests/mesa_extensions.cpp
> new file mode 100644
> index 000..5154ae1
> --- /dev/null
> +++ b/src/mesa/main/tests/mesa_extensions.cpp
> @@ -0,0 +1,47 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * \name mesa_extensions.cpp
> + *
> + * Verify that the extensions table is sorted.
> + *

Delete the empty " *" line.

> + */
> +
> +#include 
> +#include "main/mtypes.h"
> +#include "main/extensions.h"
> +
> +/**
> + * Debug/test: verify the extension table is alphabetically sorted.
> + */
> +TEST(MesaExtensionsTest, AlphabeticallySorted)
> +{
> +   for (int i = 0; i < MESA_EXTENSION_COUNT - 1; ++i) {
> +  const char * current_str = _mesa_extension_table[i].name;
> +  const char * next_str = _mesa_extension_table[i+1].name;
> +
> +  /* We expect the extension table to be alphabetically sorted */
> +  ASSERT_LT(strcmp(current_str, next_str), 0);
> +   }
> +}
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/8 v2] nir: Add nir_texop_samples_identical opcode

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

This is the NIR analog to GLSL IR ir_samples_identical.  However, the
NIR has an extra source.  This is a fake sample index with a type
nir_tex_src_ms_index.  This enables backends to (likely) share more code
with the existing nir_texop_txf_ms implementation.

v2: Don't add the second nir_tex_src_ms_index parameter.  Suggested by
Ken and Jason.

Signed-off-by: Ian Romanick 
---
 src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 3 +++
 src/glsl/nir/glsl_to_nir.cpp | 6 ++
 src/glsl/nir/nir.h   | 4 
 src/glsl/nir/nir_print.c | 4 +++-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 1 +
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp   | 1 +
 6 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c 
b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 157dc73..0f5c7e9 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1624,6 +1624,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
case nir_texop_tg4:
case nir_texop_query_levels:
case nir_texop_texture_samples:
+   case nir_texop_samples_identical:
compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
return;
}
@@ -1889,6 +1890,8 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
case nir_texop_query_levels:
emit_tex_query_levels(ctx, tex);
break;
+   case nir_texop_samples_identical:
+   unreachable("nir_texop_samples_identical");
default:
emit_tex(ctx, tex);
break;
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index e149d73..18ef490 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1798,6 +1798,11 @@ nir_visitor::visit(ir_texture *ir)
   num_srcs = 0;
   break;
 
+   case ir_samples_identical:
+  op = nir_texop_samples_identical;
+  num_srcs = 1; /* coordinate */
+  break;
+
default:
   unreachable("not reached");
}
@@ -1825,6 +1830,7 @@ nir_visitor::visit(ir_texture *ir)
case GLSL_TYPE_INT:
   instr->dest_type = nir_type_int;
   break;
+   case GLSL_TYPE_BOOL:
case GLSL_TYPE_UINT:
   instr->dest_type = nir_type_unsigned;
   break;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e9d722e..6837ec2 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -955,6 +955,9 @@ typedef enum {
nir_texop_tg4,/**< Texture gather */
nir_texop_query_levels,   /**< Texture levels query */
nir_texop_texture_samples,/**< Texture samples query */
+   nir_texop_samples_identical,  /**< Query whether all samples are definitely
+  * identical.
+  */
 } nir_texop;
 
 typedef struct {
@@ -1028,6 +1031,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
 
case nir_texop_texture_samples:
case nir_texop_query_levels:
+   case nir_texop_samples_identical:
   return 1;
 
default:
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index f7f5fdf..fa2f321 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -512,7 +512,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
case nir_texop_texture_samples:
   fprintf(fp, "texture_samples ");
   break;
-
+   case nir_texop_samples_identical:
+  fprintf(fp, "samples_identical ");
+  break;
default:
   unreachable("Invalid texture operation");
   break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index ebdcb3a..80315fe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2732,6 +2732,7 @@ fs_visitor::nir_emit_texture(const fs_builder , 
nir_tex_instr *instr)
   inst->base_mrf = -1;
   return;
}
+   case nir_texop_samples_identical: op = ir_samples_identical; break;
default:
   unreachable("unknown texture opcode");
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 27933d7..3c2674d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1545,6 +1545,7 @@ ir_texture_opcode_for_nir_texop(nir_texop texop)
case nir_texop_txf_ms: op = ir_txf_ms; break;
case nir_texop_txl: op = ir_txl; break;
case nir_texop_txs: op = ir_txs; break;
+   case nir_texop_samples_identical: op = ir_samples_identical; break;
default:
   unreachable("unknown texture opcode");
}
-- 
2.1.0

___

[Mesa-dev] [PATCH 7/8 v2] i965/vec4: Handle nir_tex_src_ms_index more like the scalar

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 3c2674d..c93227c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1654,14 +1654,6 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
 
   case nir_tex_src_ms_index: {
  sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
- assert(coord_type != NULL);
- if (devinfo->gen >= 7 &&
- key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
-mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
- } else {
-mcs = src_reg(0u);
- }
- mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
  break;
   }
 
@@ -1703,6 +1695,16 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
   }
}
 
+   if (instr->op == nir_texop_txf_ms) {
+  assert(coord_type != NULL);
+  if (devinfo->gen >= 7 &&
+  key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
+ mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
+  } else {
+ mcs = src_reg(0u);
+  }
+   }
+
uint32_t constant_offset = 0;
for (unsigned i = 0; i < 3; i++) {
   if (instr->const_offset[i] != 0) {
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 8/8 v2] i965: Enable EXT_shader_samples_identical

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

v2: Handle immediate value for MCS smarter.  Rebase on changes to
nir_texop_sampels_identical (missing second parameter).  Suggested by
Jason.  This still doesn't handle the 16x MSAA case.

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  4 +++-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 16 
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp |  4 +++-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +++
 src/mesa/drivers/dri/i965/intel_extensions.c   |  1 +
 5 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 80315fe..c85c9fc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2624,6 +2624,7 @@ fs_visitor::nir_emit_texture(const fs_builder , 
nir_tex_instr *instr)
  switch (instr->op) {
  case nir_texop_txf:
  case nir_texop_txf_ms:
+ case nir_texop_samples_identical:
 coordinate = retype(src, BRW_REGISTER_TYPE_D);
 break;
  default:
@@ -2686,7 +2687,8 @@ fs_visitor::nir_emit_texture(const fs_builder , 
nir_tex_instr *instr)
   }
}
 
-   if (instr->op == nir_texop_txf_ms) {
+   if (instr->op == nir_texop_txf_ms ||
+   instr->op == nir_texop_samples_identical) {
   if (devinfo->gen >= 7 &&
   key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
  mcs = emit_mcs_fetch(coordinate, instr->coord_components, 
sampler_reg);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index a7bd9ce..66cbbd2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -259,6 +259,22 @@ fs_visitor::emit_texture(ir_texture_opcode op,
   lod = fs_reg(0u);
}
 
+   if (op == ir_samples_identical) {
+  fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 1));
+
+  /* If mcs is an immediate value, it means there is no MCS.  In that case
+   * just return false.
+   */
+  if (mcs.file == BRW_IMMEDIATE_VALUE) {
+ bld.MOV(dst, fs_reg(0));
+  } else {
+ bld.CMP(dst, mcs, src_reg(0u), BRW_CONDITIONAL_EQ);
+  }
+
+  this->result = dst;
+  return;
+   }
+
if (coordinate.file != BAD_FILE) {
   /* FINISHME: Texture coordinate rescaling doesn't work with non-constant
* samplers.  This should only be a problem with GL_CLAMP on Gen7.
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index c93227c..98a4d3b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1615,6 +1615,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
  switch (instr->op) {
  case nir_texop_txf:
  case nir_texop_txf_ms:
+ case nir_texop_samples_identical:
 coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D,
  src_size);
 coord_type = glsl_type::ivec(src_size);
@@ -1695,7 +1696,8 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
   }
}
 
-   if (instr->op == nir_texop_txf_ms) {
+   if (instr->op == nir_texop_txf_ms ||
+   instr->op == nir_texop_samples_identical) {
   assert(coord_type != NULL);
   if (devinfo->gen >= 7 &&
   key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index fda3d7c..d8a0f22 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -909,6 +909,17 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
   unreachable("TXB is not valid for vertex shaders.");
case ir_lod:
   unreachable("LOD is not valid for vertex shaders.");
+   case ir_samples_identical: {
+  /* If mcs is an immediate value, it means there is no MCS.  In that case
+   * just return false.
+   */
+  if (mcs.file == BRW_IMMEDIATE_VALUE) {
+ emit(MOV(dest, src_reg(0u)));
+  } else {
+ emit(CMP(dest, mcs, src_reg(0u), BRW_CONDITIONAL_EQ));
+  }
+  return;
+   }
default:
   unreachable("Unrecognized tex op");
}
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 386b63c..2e2459c 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -333,6 +333,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_texture_compression_bptc = true;
   ctx->Extensions.ARB_texture_view = true;
   ctx->Extensions.ARB_shader_storage_buffer_object = true;
+

Re: [Mesa-dev] [PATCH 1/2] mesa/extensions: Sort the extension table alphabetically

2015-11-18 Thread Ilia Mirkin

On Wed, Nov 18, 2015 at 6:47 PM, Matt Turner  wrote:
> On Wed, Nov 18, 2015 at 3:01 PM, Nanley Chery  wrote:
>> From: Nanley Chery 
>>
>> Make it easier to determine where to add new extensions.
>> Performed with the vim sort command.
>
> Well, I think I'm obligated to review such a patch :)
>
>> Signed-off-by: Nanley Chery 
>> ---
>>  src/mesa/main/extensions_table.h | 173 
>> +--
>>  1 file changed, 92 insertions(+), 81 deletions(-)
>>
>> diff --git a/src/mesa/main/extensions_table.h 
>> b/src/mesa/main/extensions_table.h
>> index d12fd9f..4b42863 100644
>> --- a/src/mesa/main/extensions_table.h
>> +++ b/src/mesa/main/extensions_table.h
>> @@ -1,8 +1,30 @@
>> +/* The extension table is alphabetically sorted by the extension name 
>> string column. */
>> +
>>  #define GLL 0
>>  #define GLC 0
>>  #define ES1 0
>>  #define ES2 0
>>  #define  x ~0
>
> I'd probably put a blank line between the last #define and the first EXT(...)
>
> I trust sort, so

Be careful who you trust:

$ LC_ALL=en_US.utf8 sort
A
a
a
A
$ LC_ALL=C sort
A
a
A
a

There's an implicit ^D which I typed after the second line...
hopefully we're not dealing with locale-sensitive functions.

>
> Reviewed-by: Matt Turner 
>
>> +EXT(3DFX_texture_compression_FXT1   , TDFX_texture_compression_FXT1 
>>  , GLL, GLC,  x ,  x , 1999)
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/8] i965/fs: Handle nir_tex_src_ms_index more like the vec4

2015-11-18 Thread Ian Romanick

On 11/18/2015 04:57 PM, Jason Ekstrand wrote:
> On Wed, Nov 18, 2015 at 4:07 PM, Kenneth Graunke  
> wrote:
>> On Wednesday, November 18, 2015 03:46:53 PM Ian Romanick wrote:
>>> From: Ian Romanick 
>>>
>>> Signed-off-by: Ian Romanick 
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +++-
>>>  1 file changed, 7 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> index cdd470b..1f71f66 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> @@ -2579,6 +2579,13 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>>> nir_tex_instr *instr)
>>>   break;
>>>case nir_tex_src_ms_index:
>>>   sample_index = retype(src, BRW_REGISTER_TYPE_UD);
>>> + if (devinfo->gen >= 7 &&
>>> + key_tex->compressed_multisample_layout_mask & (1 << sampler)) 
>>> {
>>> +mcs = emit_mcs_fetch(coordinate, instr->coord_components, 
>>> sampler_reg);
>>> + } else {
>>> +mcs = fs_reg(0u);
>>> + }
>>> + mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
>>
>> No need for this line - emit_mcs_fetch already returns a UD register,
>> and fs_reg(0u) is already UD as well.
>>
>>>   break;
>>>case nir_tex_src_offset:
>>>   tex_offset = retype(src, BRW_REGISTER_TYPE_D);
>>> @@ -2612,15 +2619,6 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>>> nir_tex_instr *instr)
>>>}
>>> }
>>>
>>> -   if (instr->op == nir_texop_txf_ms) {
>>> -  if (devinfo->gen >= 7 &&
>>> -  key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
>>> - mcs = emit_mcs_fetch(coordinate, instr->coord_components, 
>>> sampler_reg);
>>> -  } else {
>>> - mcs = fs_reg(0u);
>>> -  }
>>> -   }
> 
> Looking at the code in both back-ends, I think that if you made the
> vec4 do it like FS, you could just change the above condition to
> "instr->op == nir_texop_txf_ms || instr->op ==
> nir_texop_samples_identical" and you wouldn't need the extra dummy
> source.  Am I missing something?

That is probably true.  I started with vec4, and that's why things ended
up this way.  I'll try the other way and report back.

>>> -
>>> for (unsigned i = 0; i < 3; i++) {
>>>if (instr->const_offset[i] != 0) {
>>>   assert(offset_components == 0);
>>>
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 8/8] i965: Enable EXT_shader_samples_identical

2015-11-18 Thread Ian Romanick

On 11/18/2015 04:06 PM, Kenneth Graunke wrote:
> On Wednesday, November 18, 2015 03:46:54 PM Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> Signed-off-by: Ian Romanick 
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  1 +
>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 16 
>>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp |  1 +
>>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +++
>>  src/mesa/drivers/dri/i965/intel_extensions.c   |  1 +
>>  5 files changed, 30 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 1f71f66..4af1234 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -2550,6 +2550,7 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>> nir_tex_instr *instr)
>>   switch (instr->op) {
>>   case nir_texop_txf:
>>   case nir_texop_txf_ms:
>> + case nir_texop_samples_identical:
>>  coordinate = retype(src, BRW_REGISTER_TYPE_D);
>>  break;
>>   default:
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index a7bd9ce..6688f6a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -259,6 +259,22 @@ fs_visitor::emit_texture(ir_texture_opcode op,
>>lod = fs_reg(0u);
>> }
>>  
>> +   if (op == ir_samples_identical) {
>> +  fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 
>> 1));
>> +
>> +  if (mcs.file == BRW_IMMEDIATE_VALUE) {
>> + fs_reg tmp = vgrf(glsl_type::uint_type);
>> +
>> + bld.MOV(tmp, mcs);
>> + bld.CMP(dst, tmp, src_reg(0u), BRW_CONDITIONAL_EQ);
> 
> Seems a little strange to emit assembly to do the comparison when
> you've already determined that the value is a compile time constant.
> 
> Why not just:
> 
>bld.MOV(dst, fs_reg(mcs.ud == 0u ? ~0u : 0u));

Mostly because I didn't realized that the constant value was so
trivially available... and I was lazy. :)  I can change this too.

I'm finding it difficult to hit this path without modifying the driver
to not use MCS.  Suggestions?

>> +  } else {
>> + bld.CMP(dst, mcs, src_reg(0u), BRW_CONDITIONAL_EQ);
>> +  }
>> +
>> +  this->result = dst;
>> +  return;
>> +   }
>> +
>> if (coordinate.file != BAD_FILE) {
>>/* FINISHME: Texture coordinate rescaling doesn't work with 
>> non-constant
>> * samplers.  This should only be a problem with GL_CLAMP on Gen7.
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> index 3c2674d..41c3c10 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> @@ -1615,6 +1615,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
>>   switch (instr->op) {
>>   case nir_texop_txf:
>>   case nir_texop_txf_ms:
>> + case nir_texop_samples_identical:
>>  coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D,
>>   src_size);
>>  coord_type = glsl_type::ivec(src_size);
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> index fda3d7c..2190a86 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> @@ -909,6 +909,17 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
>>unreachable("TXB is not valid for vertex shaders.");
>> case ir_lod:
>>unreachable("LOD is not valid for vertex shaders.");
>> +   case ir_samples_identical: {
>> +  if (mcs.file == BRW_IMMEDIATE_VALUE) {
>> + const src_reg temp = src_reg(this, glsl_type::uint_type);
>> +
>> + emit(MOV(dst_reg(temp), mcs));
>> + emit(CMP(dest, temp, src_reg(0u), BRW_CONDITIONAL_EQ));
> 
> Ditto.
> 
>bld.MOV(dst, src_reg(mcs.ud == 0u ? ~0u : 0u));
> 
>> +  } else {
>> + emit(CMP(dest, mcs, src_reg(0u), BRW_CONDITIONAL_EQ));
>> +  }
>> +  return;
>> +   }
>> default:
>>unreachable("Unrecognized tex op");
>> }
>> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
>> b/src/mesa/drivers/dri/i965/intel_extensions.c
>> index 386b63c..2e2459c 100644
>> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
>> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
>> @@ -333,6 +333,7 @@ intelInitExtensions(struct gl_context *ctx)
>>ctx->Extensions.ARB_texture_compression_bptc = true;
>>ctx->Extensions.ARB_texture_view = true;
>>ctx->Extensions.ARB_shader_storage_buffer_object = true;
>> +  ctx->Extensions.EXT_shader_samples_identical = true;
>>  
>>if

Re: [Mesa-dev] [PATCH 8/8] i965: Enable EXT_shader_samples_identical

2015-11-18 Thread Ian Romanick

On 11/18/2015 05:02 PM, Jason Ekstrand wrote:
> On Wed, Nov 18, 2015 at 4:06 PM, Kenneth Graunke  
> wrote:
>> On Wednesday, November 18, 2015 03:46:54 PM Ian Romanick wrote:
>>> From: Ian Romanick 
>>>
>>> Signed-off-by: Ian Romanick 
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  1 +
>>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 16 
>>>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp |  1 +
>>>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +++
>>>  src/mesa/drivers/dri/i965/intel_extensions.c   |  1 +
>>>  5 files changed, 30 insertions(+)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> index 1f71f66..4af1234 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> @@ -2550,6 +2550,7 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>>> nir_tex_instr *instr)
>>>   switch (instr->op) {
>>>   case nir_texop_txf:
>>>   case nir_texop_txf_ms:
>>> + case nir_texop_samples_identical:
>>>  coordinate = retype(src, BRW_REGISTER_TYPE_D);
>>>  break;
>>>   default:
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> index a7bd9ce..6688f6a 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> @@ -259,6 +259,22 @@ fs_visitor::emit_texture(ir_texture_opcode op,
>>>lod = fs_reg(0u);
>>> }
>>>
>>> +   if (op == ir_samples_identical) {
>>> +  fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 
>>> 1));
>>> +
>>> +  if (mcs.file == BRW_IMMEDIATE_VALUE) {
>>> + fs_reg tmp = vgrf(glsl_type::uint_type);
>>> +
>>> + bld.MOV(tmp, mcs);
>>> + bld.CMP(dst, tmp, src_reg(0u), BRW_CONDITIONAL_EQ);
>>
>> Seems a little strange to emit assembly to do the comparison when
>> you've already determined that the value is a compile time constant.
>>
>> Why not just:
>>
>>bld.MOV(dst, fs_reg(mcs.ud == 0u ? ~0u : 0u));
> 
> Actually, getting an immediate here means we don't have an MCS and we
> have no idea of the samples are identical, so we should return false
> always.

Derp.  Yeah, that's true.

>>> +  } else {
>>> + bld.CMP(dst, mcs, src_reg(0u), BRW_CONDITIONAL_EQ);
> 
> We should also consider handling the clear color case.  In this case,
> we'll get 0xff for 2x and 0x for 4x or 8x.  Do we know the
> number of samples in the shader?  We should be able to get that from
> the sampler or something but then we would have to pass that through
> the key and that would get gross.

Does that only apply to clear colors that are compatible with
fast-clear?  In my simple test, it appears that the cleared area returns
all zeros.

> One other thought, 16x MSAA will break all this because it gives you a
> ivec4 value from the MCS (if I remember correctly).  Not sure if we've
> landed 16x MSAA yet though.

Oof.  I think it has, but I don't think I have a 16x-compatible
platform.  I guess we could always return false for now if the sampler
is 16x.

> --Jason
> 
>>> +  }
>>> +
>>> +  this->result = dst;
>>> +  return;
>>> +   }
>>> +
>>> if (coordinate.file != BAD_FILE) {
>>>/* FINISHME: Texture coordinate rescaling doesn't work with 
>>> non-constant
>>> * samplers.  This should only be a problem with GL_CLAMP on Gen7.
>>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>>> index 3c2674d..41c3c10 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>>> @@ -1615,6 +1615,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
>>>   switch (instr->op) {
>>>   case nir_texop_txf:
>>>   case nir_texop_txf_ms:
>>> + case nir_texop_samples_identical:
>>>  coordinate = get_nir_src(instr->src[i].src, 
>>> BRW_REGISTER_TYPE_D,
>>>   src_size);
>>>  coord_type = glsl_type::ivec(src_size);
>>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>>> index fda3d7c..2190a86 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>>> @@ -909,6 +909,17 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
>>>unreachable("TXB is not valid for vertex shaders.");
>>> case ir_lod:
>>>unreachable("LOD is not valid for vertex shaders.");
>>> +   case ir_samples_identical: {
>>> +  if (mcs.file == BRW_IMMEDIATE_VALUE) {
>>> + const src_reg temp = src_reg(this, glsl_type::uint_type);
>>> +
>>> + emit(MOV(dst_reg(temp), mcs));
>>> +

Re: [Mesa-dev] [PATCH 1/2] mesa/extensions: Sort the extension table alphabetically

2015-11-18 Thread Nanley Chery

On Wed, Nov 18, 2015 at 3:47 PM, Matt Turner  wrote:
>
> On Wed, Nov 18, 2015 at 3:01 PM, Nanley Chery 
wrote:
> > From: Nanley Chery 
> >
> > Make it easier to determine where to add new extensions.
> > Performed with the vim sort command.
>
> Well, I think I'm obligated to review such a patch :)
>

Glad to get your review. It was inspired by your earlier review feedback
after all.

>
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/mesa/main/extensions_table.h | 173
+--
> >  1 file changed, 92 insertions(+), 81 deletions(-)
> >
> > diff --git a/src/mesa/main/extensions_table.h
b/src/mesa/main/extensions_table.h
> > index d12fd9f..4b42863 100644
> > --- a/src/mesa/main/extensions_table.h
> > +++ b/src/mesa/main/extensions_table.h
> > @@ -1,8 +1,30 @@
> > +/* The extension table is alphabetically sorted by the extension name
string column. */
> > +
> >  #define GLL 0
> >  #define GLC 0
> >  #define ES1 0
> >  #define ES2 0
> >  #define  x ~0
>
> I'd probably put a blank line between the last #define and the first
EXT(...)
>

Sure, I'll include one.

>
> I trust sort, so
>
> Reviewed-by: Matt Turner 
>
>
> > +EXT(3DFX_texture_compression_FXT1   ,
TDFX_texture_compression_FXT1  , GLL, GLC,  x ,  x , 1999)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/8] i965/fs: Handle nir_tex_src_ms_index more like the vec4

2015-11-18 Thread Kenneth Graunke

On Wednesday, November 18, 2015 05:20:29 PM Ian Romanick wrote:
> On 11/18/2015 04:07 PM, Kenneth Graunke wrote:
> > On Wednesday, November 18, 2015 03:46:53 PM Ian Romanick wrote:
> >> From: Ian Romanick 
> >>
> >> Signed-off-by: Ian Romanick 
> >> ---
> >>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +++-
> >>  1 file changed, 7 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> >> index cdd470b..1f71f66 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> >> @@ -2579,6 +2579,13 @@ fs_visitor::nir_emit_texture(const fs_builder , 
> >> nir_tex_instr *instr)
> >>   break;
> >>case nir_tex_src_ms_index:
> >>   sample_index = retype(src, BRW_REGISTER_TYPE_UD);
> >> + if (devinfo->gen >= 7 &&
> >> + key_tex->compressed_multisample_layout_mask & (1 << 
> >> sampler)) {
> >> +mcs = emit_mcs_fetch(coordinate, instr->coord_components, 
> >> sampler_reg);
> >> + } else {
> >> +mcs = fs_reg(0u);
> >> + }
> >> + mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
> > 
> > No need for this line - emit_mcs_fetch already returns a UD register,
> > and fs_reg(0u) is already UD as well.
> 
> Is the similar line in vec4 also spurious?  I was just mimicking that.

Yeah, it looks pretty spurious to me.

--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeon/llvm: Use llvm.AMDIL.exp intrinsic again for now

2015-11-18 Thread Michel Dänzer

From: Michel Dänzer 

llvm.exp2.f32 doesn't work in some cases yet.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92709
Signed-off-by: Michel Dänzer 
---

Once the problem is fixed in the LLVM AMDGPU backend, we can re-enable
llvm.exp2.f32 for the fixed LLVM version.

 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index ac99e73..c94f109 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1539,7 +1539,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context 
* ctx)
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
-   bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
+   bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
-- 
2.6.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa/extensions: Sort the extension table alphabetically

2015-11-18 Thread Kenneth Graunke

On Wednesday, November 18, 2015 06:25:45 PM Ian Romanick wrote:
> On 11/18/2015 06:15 PM, Matt Turner wrote:
> > On Wed, Nov 18, 2015 at 6:07 PM, Ian Romanick  wrote:
> >> On 11/18/2015 03:01 PM, Nanley Chery wrote:
> >>> From: Nanley Chery 
> >>>
> >>> Make it easier to determine where to add new extensions.
> >>> Performed with the vim sort command.
> >>
> >> Uh... no, please.  Extensions should be sorted in each group, but what
> >> is wrong with the old group ordering?  ARB, KHR, OES, EXT, then vendors.
> > 
> > There's nothing wrong with it other than it's not possible to ensure
> > people get it right (see this patch). By actually alphabetizing the
> > list, we can programmatically check that things are ordered properly
> > (see patch 2/2).
> 
> Well... you can, it's just more work.
> 
> > I don't see a problem with reordering the groups... that doesn't make
> > anything harder.
> 
> Most of the time when I'm looking at things in the extension list, I'm
> trying to look at things by group.  Most of the groups stay together
> when the list is totally alphabetized, so that doesn't hurt.  What I
> think will be annoying is having all of the vendor extensions scattered
> about.
> 
> I guess we can try it like this.  I reserve the right to submit patches
> the reorder the groups and update the test if I find it too annoying. :)

Yeah, I was going to say the same thing - it's kind of nice having
ARB (official), OES (official ES), EXT (multi-vendor/semi-official),
and vendor extensions organized.

But, "alphabetize!" is simple and easy to explain, and since it still
keeps all the ARB/OES/EXT together, it's pretty reasonable even if you
like the groupings.  So I'm fine with it too.  I like the idea of having
a test that enforces it.

--Ken

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 8/8] i965: Enable EXT_shader_samples_identical

2015-11-18 Thread Jason Ekstrand

On Wed, Nov 18, 2015 at 4:06 PM, Kenneth Graunke  wrote:
> On Wednesday, November 18, 2015 03:46:54 PM Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> Signed-off-by: Ian Romanick 
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  1 +
>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 16 
>>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp |  1 +
>>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +++
>>  src/mesa/drivers/dri/i965/intel_extensions.c   |  1 +
>>  5 files changed, 30 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 1f71f66..4af1234 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -2550,6 +2550,7 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>> nir_tex_instr *instr)
>>   switch (instr->op) {
>>   case nir_texop_txf:
>>   case nir_texop_txf_ms:
>> + case nir_texop_samples_identical:
>>  coordinate = retype(src, BRW_REGISTER_TYPE_D);
>>  break;
>>   default:
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index a7bd9ce..6688f6a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -259,6 +259,22 @@ fs_visitor::emit_texture(ir_texture_opcode op,
>>lod = fs_reg(0u);
>> }
>>
>> +   if (op == ir_samples_identical) {
>> +  fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 
>> 1));
>> +
>> +  if (mcs.file == BRW_IMMEDIATE_VALUE) {
>> + fs_reg tmp = vgrf(glsl_type::uint_type);
>> +
>> + bld.MOV(tmp, mcs);
>> + bld.CMP(dst, tmp, src_reg(0u), BRW_CONDITIONAL_EQ);
>
> Seems a little strange to emit assembly to do the comparison when
> you've already determined that the value is a compile time constant.
>
> Why not just:
>
>bld.MOV(dst, fs_reg(mcs.ud == 0u ? ~0u : 0u));

Actually, getting an immediate here means we don't have an MCS and we
have no idea of the samples are identical, so we should return false
always.

>> +  } else {
>> + bld.CMP(dst, mcs, src_reg(0u), BRW_CONDITIONAL_EQ);

We should also consider handling the clear color case.  In this case,
we'll get 0xff for 2x and 0x for 4x or 8x.  Do we know the
number of samples in the shader?  We should be able to get that from
the sampler or something but then we would have to pass that through
the key and that would get gross.

One other thought, 16x MSAA will break all this because it gives you a
ivec4 value from the MCS (if I remember correctly).  Not sure if we've
landed 16x MSAA yet though.
--Jason

>> +  }
>> +
>> +  this->result = dst;
>> +  return;
>> +   }
>> +
>> if (coordinate.file != BAD_FILE) {
>>/* FINISHME: Texture coordinate rescaling doesn't work with 
>> non-constant
>> * samplers.  This should only be a problem with GL_CLAMP on Gen7.
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> index 3c2674d..41c3c10 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> @@ -1615,6 +1615,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
>>   switch (instr->op) {
>>   case nir_texop_txf:
>>   case nir_texop_txf_ms:
>> + case nir_texop_samples_identical:
>>  coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D,
>>   src_size);
>>  coord_type = glsl_type::ivec(src_size);
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> index fda3d7c..2190a86 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> @@ -909,6 +909,17 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
>>unreachable("TXB is not valid for vertex shaders.");
>> case ir_lod:
>>unreachable("LOD is not valid for vertex shaders.");
>> +   case ir_samples_identical: {
>> +  if (mcs.file == BRW_IMMEDIATE_VALUE) {
>> + const src_reg temp = src_reg(this, glsl_type::uint_type);
>> +
>> + emit(MOV(dst_reg(temp), mcs));
>> + emit(CMP(dest, temp, src_reg(0u), BRW_CONDITIONAL_EQ));
>
> Ditto.
>
>bld.MOV(dst, src_reg(mcs.ud == 0u ? ~0u : 0u));
>
>> +  } else {
>> + emit(CMP(dest, mcs, src_reg(0u), BRW_CONDITIONAL_EQ));
>> +  }
>> +  return;
>> +   }
>> default:
>>unreachable("Unrecognized tex op");
>> }
>> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
>> b/src/mesa/drivers/dri/i965/intel_extensions.c
>> index 386b63c..2e2459c 100644
>> ---

Re: [Mesa-dev] [PATCH] [v2] i965: Add lossless compression to surface format table

2015-11-18 Thread Ben Widawsky

On Wed, Nov 18, 2015 at 03:50:32PM -0800, Ben Widawsky wrote:
> On Wed, Nov 18, 2015 at 11:10:12AM +0200, Pohjolainen, Topi wrote:
> > On Tue, Nov 17, 2015 at 05:30:06PM -0800, Ben Widawsky wrote:
> > > Background: Prior to Skylake and since Ivybridge Intel hardware has had 
> > > the
> > > ability to use a MCS (Multisample Control Surface) as auxiliary data in
> > > "compression" operations on the surface. This reduces memory bandwidth.  
> > > This
> > > hardware was either used for MSAA compression, and fast clear operations. 
> > >  On
> > 
> > This says:
> > 
> > ... either ... , and ... 
> > 
> > should it have been
> > 
> > ... either ... or ... 
> > 
> 
> The latter, thanks.
> 
> > 
> > All in all, I really appreciate the thorough explanation here in this 
> > commit,
> > just had to check. I know I'm late with my comments, so bare with me.
> > 
> > > Gen8, a similar mechanism exists to allow the hiz buffer to be sampled 
> > > from, and
> > > therefore this feature is sometimes referred to more generally as "AUX 
> > > buffers".
> > > 
> > > Skylake adds the ability to have the display engine directly source 
> > > compressed
> > > surfaces on top of the ability to sample from them. Inference dictates 
> > > that
> > > enabling this display features adding a restriction to the formats which 
> > > could
> > 
> > s/adding/adds/ ?
> 
> Yes.

I've added a spec reference here since I didn't spot it before. Just FYI since
you gave me the r-b already:

'Skylake adds the ability to have the display engine directly source compressed
surfaces on top of the ability to sample from them. Inference dictates that
enabling this display features adds a restriction to the formats which could
actually be compressed. This is backed up by a blurb in the AUX_CCS_D section
from the RENDER_SURFACE_STATE: "In addition, if the surface is bound to the
sampling engine, Surface Format must be supported for Render Target Compression
for surfaces bound to the sampling engine."' ...

> 
> > 
> > > actually be compressed. The current set of surfaces seems to be a subset 
> > > as
> > > compared to previous gens (see the next patch). Also, if I had to guess I 
> > > would
> > > guess that future gens add support for more surface formats. To make 
> > > handling
> > > this a bit easier to read, and more future proof, the support for this is 
> > > moved
> > > into the surface formats table.
> > > 

> > > Along with the modifications to the table, a helper function is also 
> > > provided to
> > > determine if a surface is CCS compatible.  Because fast clears are 
> > > currently
> > > disabled on SKL, we can plumb the helper all the way through here, and not
> > > actually have anything break.
> > > 
> > > The logic in the table works a bit differently than the other columns in 
> > > the
> > > table and therefore deserves a small mention. For most other features, 
> > > the GEN
> > 
> > I have difficulty here also: the sentence compares table to other columns in
> > the table ("... logic in the table ... than other columns...").
> > 
> > Did you mean to say that a particular _column_ in the table behaves
> > differently than the others?
> 
> Yeah, I need to fix this, It's not actually true with the last change that 
> Chad
> requested me to make. It has no distinction from other columns in the table 
> now.
> 
> > 
> > > which began implementing it is set, and it is assumed future gens also 
> > > support
> > > this. For this feature, GEN9 actually eliminates support for certain 
> > > formats. We
> > > could use this column to determine support for the similar feature on 
> > > older
> > 
> > And here you refer to the newly added column? Comparing the contents of that
> > column to the supported render targets (column RT) gives you the delta 
> > between
> > gen9 and older?
> > 
> > > generation hardware. Aside from that being an error prone task which is
> > > unrelated to enabling this on GEN9, it becomes somewhat tricky to 
> > > implement
> > > because of the fact that surface format support diminishes. You'd 
> > > probably want
> > > another column to cleanly implement it.
> > 
> > And this is what you did, right?
> 
> No. What I meant by this is you could have a field which I'll call lossless
> compression (there is debate about whether that's accurate, but to me it's a
> good name) and it could apply for all generations which support it. For 
> example,
> RGBA8 would be 70 (because it has supported this since gen7) and L16_UNORM 
> would
> be x (because it's not 4, 8, or 16 cpp).
> 
> It turns out that doesn't work so easily because there are formats which work 
> on
> gen7, 7.5 and 8, but do not work on gen9. The table doesn't have range support
> built it, so you'd need two columns to do it right, the legacy compression, 
> and
> the newer version of the compression.
> 
> I'll be dropping this whole part of the commit message to address the last
> change in the patch, since it is causing confusion.
> 
> > 
> >

Re: [Mesa-dev] [PATCH 7/8] i965/fs: Handle nir_tex_src_ms_index more like the vec4

2015-11-18 Thread Ian Romanick

On 11/18/2015 04:07 PM, Kenneth Graunke wrote:
> On Wednesday, November 18, 2015 03:46:53 PM Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> Signed-off-by: Ian Romanick 
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +++-
>>  1 file changed, 7 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index cdd470b..1f71f66 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -2579,6 +2579,13 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>> nir_tex_instr *instr)
>>   break;
>>case nir_tex_src_ms_index:
>>   sample_index = retype(src, BRW_REGISTER_TYPE_UD);
>> + if (devinfo->gen >= 7 &&
>> + key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
>> +mcs = emit_mcs_fetch(coordinate, instr->coord_components, 
>> sampler_reg);
>> + } else {
>> +mcs = fs_reg(0u);
>> + }
>> + mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
> 
> No need for this line - emit_mcs_fetch already returns a UD register,
> and fs_reg(0u) is already UD as well.

Is the similar line in vec4 also spurious?  I was just mimicking that.

>>   break;
>>case nir_tex_src_offset:
>>   tex_offset = retype(src, BRW_REGISTER_TYPE_D);
>> @@ -2612,15 +2619,6 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>> nir_tex_instr *instr)
>>}
>> }
>>  
>> -   if (instr->op == nir_texop_txf_ms) {
>> -  if (devinfo->gen >= 7 &&
>> -  key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
>> - mcs = emit_mcs_fetch(coordinate, instr->coord_components, 
>> sampler_reg);
>> -  } else {
>> - mcs = fs_reg(0u);
>> -  }
>> -   }
>> -
>> for (unsigned i = 0; i < 3; i++) {
>>if (instr->const_offset[i] != 0) {
>>   assert(offset_components == 0);
>>




signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa/extensions: Sort the extension table alphabetically

2015-11-18 Thread Ian Romanick

On 11/18/2015 06:15 PM, Matt Turner wrote:
> On Wed, Nov 18, 2015 at 6:07 PM, Ian Romanick  wrote:
>> On 11/18/2015 03:01 PM, Nanley Chery wrote:
>>> From: Nanley Chery 
>>>
>>> Make it easier to determine where to add new extensions.
>>> Performed with the vim sort command.
>>
>> Uh... no, please.  Extensions should be sorted in each group, but what
>> is wrong with the old group ordering?  ARB, KHR, OES, EXT, then vendors.
> 
> There's nothing wrong with it other than it's not possible to ensure
> people get it right (see this patch). By actually alphabetizing the
> list, we can programmatically check that things are ordered properly
> (see patch 2/2).

Well... you can, it's just more work.

> I don't see a problem with reordering the groups... that doesn't make
> anything harder.

Most of the time when I'm looking at things in the extension list, I'm
trying to look at things by group.  Most of the groups stay together
when the list is totally alphabetized, so that doesn't hurt.  What I
think will be annoying is having all of the vendor extensions scattered
about.

I guess we can try it like this.  I reserve the right to submit patches
the reorder the groups and update the test if I find it too annoying. :)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/8] nir: Add nir_texop_samples_identical opcode

2015-11-18 Thread Jason Ekstrand

On Wed, Nov 18, 2015 at 4:23 PM, Kenneth Graunke  wrote:
> On Wednesday, November 18, 2015 03:46:52 PM Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> This is the NIR analog to GLSL IR ir_samples_identical.  However, the
>> NIR has an extra source.  This is a fake sample index with a type
>> nir_tex_src_ms_index.  This enables backends to (likely) share more code
>> with the existing nir_texop_txf_ms implementation.
>>
>> Signed-off-by: Ian Romanick 
>
> The extra source is a little weird, but I see why you did it - it makes
> it really easy to fall through to the existing code.  Seems okay.
> Fortunately, none of this is baked in stone, so we can always change it
> later if we really want :)

I'm not a huge fan of it either.  I'm not NAK anything based on it,
but it does seem a bit awkward.
--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/8] i965/fs: Handle nir_tex_src_ms_index more like the vec4

2015-11-18 Thread Jason Ekstrand

On Wed, Nov 18, 2015 at 4:07 PM, Kenneth Graunke  wrote:
> On Wednesday, November 18, 2015 03:46:53 PM Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> Signed-off-by: Ian Romanick 
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +++-
>>  1 file changed, 7 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index cdd470b..1f71f66 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -2579,6 +2579,13 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>> nir_tex_instr *instr)
>>   break;
>>case nir_tex_src_ms_index:
>>   sample_index = retype(src, BRW_REGISTER_TYPE_UD);
>> + if (devinfo->gen >= 7 &&
>> + key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
>> +mcs = emit_mcs_fetch(coordinate, instr->coord_components, 
>> sampler_reg);
>> + } else {
>> +mcs = fs_reg(0u);
>> + }
>> + mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
>
> No need for this line - emit_mcs_fetch already returns a UD register,
> and fs_reg(0u) is already UD as well.
>
>>   break;
>>case nir_tex_src_offset:
>>   tex_offset = retype(src, BRW_REGISTER_TYPE_D);
>> @@ -2612,15 +2619,6 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>> nir_tex_instr *instr)
>>}
>> }
>>
>> -   if (instr->op == nir_texop_txf_ms) {
>> -  if (devinfo->gen >= 7 &&
>> -  key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
>> - mcs = emit_mcs_fetch(coordinate, instr->coord_components, 
>> sampler_reg);
>> -  } else {
>> - mcs = fs_reg(0u);
>> -  }
>> -   }

Looking at the code in both back-ends, I think that if you made the
vec4 do it like FS, you could just change the above condition to
"instr->op == nir_texop_txf_ms || instr->op ==
nir_texop_samples_identical" and you wouldn't need the extra dummy
source.  Am I missing something?

>> -
>> for (unsigned i = 0; i < 3; i++) {
>>if (instr->const_offset[i] != 0) {
>>   assert(offset_components == 0);
>>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa/extensions: Sort the extension table alphabetically

2015-11-18 Thread Ian Romanick

On 11/18/2015 03:01 PM, Nanley Chery wrote:
> From: Nanley Chery 
> 
> Make it easier to determine where to add new extensions.
> Performed with the vim sort command.

Uh... no, please.  Extensions should be sorted in each group, but what
is wrong with the old group ordering?  ARB, KHR, OES, EXT, then vendors.

> 
> Signed-off-by: Nanley Chery 
> ---
>  src/mesa/main/extensions_table.h | 173 
> +--
>  1 file changed, 92 insertions(+), 81 deletions(-)
> 
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index d12fd9f..4b42863 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -1,8 +1,30 @@
> +/* The extension table is alphabetically sorted by the extension name string 
> column. */
> +
>  #define GLL 0
>  #define GLC 0
>  #define ES1 0
>  #define ES2 0
>  #define  x ~0
> +EXT(3DFX_texture_compression_FXT1   , TDFX_texture_compression_FXT1  
> , GLL, GLC,  x ,  x , 1999)
> +
> +EXT(AMD_conservative_depth  , ARB_conservative_depth 
> , GLL, GLC,  x ,  x , 2009)
> +EXT(AMD_draw_buffers_blend  , ARB_draw_buffers_blend 
> , GLL, GLC,  x ,  x , 2009)
> +EXT(AMD_performance_monitor , AMD_performance_monitor
> , GLL, GLC,  x ,  x , 2007)
> +EXT(AMD_pinned_memory   , AMD_pinned_memory  
> , GLL, GLC,  x ,  x , 2013)
> +EXT(AMD_seamless_cubemap_per_texture, 
> AMD_seamless_cubemap_per_texture   , GLL, GLC,  x ,  x , 2009)
> +EXT(AMD_shader_stencil_export   , ARB_shader_stencil_export  
> , GLL, GLC,  x ,  x , 2009)
> +EXT(AMD_shader_trinary_minmax   , dummy_true 
> , GLL, GLC,  x ,  x , 2012)
> +EXT(AMD_vertex_shader_layer , AMD_vertex_shader_layer
> ,  x , GLC,  x ,  x , 2012)
> +EXT(AMD_vertex_shader_viewport_index, 
> AMD_vertex_shader_viewport_index   ,  x , GLC,  x ,  x , 2012)
> +
> +EXT(ANGLE_texture_compression_dxt3  , ANGLE_texture_compression_dxt  
> , GLL, GLC, ES1, ES2, 2011)
> +EXT(ANGLE_texture_compression_dxt5  , ANGLE_texture_compression_dxt  
> , GLL, GLC, ES1, ES2, 2011)
> +
> +EXT(APPLE_object_purgeable  , APPLE_object_purgeable 
> , GLL, GLC,  x ,  x , 2006)
> +EXT(APPLE_packed_pixels , dummy_true 
> , GLL,  x ,  x ,  x , 2002)
> +EXT(APPLE_texture_max_level , dummy_true 
> ,  x ,  x , ES1, ES2, 2009)
> +EXT(APPLE_vertex_array_object   , dummy_true 
> , GLL,  x ,  x ,  x , 2002)
> +
>  EXT(ARB_ES2_compatibility   , ARB_ES2_compatibility  
> , GLL, GLC,  x ,  x , 2009)
>  EXT(ARB_ES3_compatibility   , ARB_ES3_compatibility  
> , GLL, GLC,  x ,  x , 2012)
>  EXT(ARB_arrays_of_arrays, ARB_arrays_of_arrays   
> , GLL, GLC,  x ,  x , 2012)
> @@ -16,9 +38,9 @@ EXT(ARB_color_buffer_float  , 
> ARB_color_buffer_float
>  EXT(ARB_compressed_texture_pixel_storage, dummy_true 
> , GLL, GLC,  x ,  x , 2011)
>  EXT(ARB_compute_shader  , ARB_compute_shader 
> , GLL, GLC,  x ,  x , 2012)
>  EXT(ARB_conditional_render_inverted , 
> ARB_conditional_render_inverted, GLL, GLC,  x ,  x , 2014)
> +EXT(ARB_conservative_depth  , ARB_conservative_depth 
> , GLL, GLC,  x ,  x , 2011)
>  EXT(ARB_copy_buffer , dummy_true 
> , GLL, GLC,  x ,  x , 2008)
>  EXT(ARB_copy_image  , ARB_copy_image 
> , GLL, GLC,  x ,  x , 2012)
> -EXT(ARB_conservative_depth  , ARB_conservative_depth 
> , GLL, GLC,  x ,  x , 2011)
>  EXT(ARB_debug_output, dummy_true 
> , GLL, GLC,  x ,  x , 2009)
>  EXT(ARB_depth_buffer_float  , ARB_depth_buffer_float 
> , GLL, GLC,  x ,  x , 2008)
>  EXT(ARB_depth_clamp , ARB_depth_clamp
> , GLL, GLC,  x ,  x , 2003)
> @@ -56,8 +78,8 @@ EXT(ARB_multi_bind  , dummy_true
>  EXT(ARB_multi_draw_indirect , ARB_draw_indirect  
> ,  x , GLC,  x ,  x , 2012)
>  EXT(ARB_multisample , dummy_true 
> , GLL,  x ,  x ,  x , 1994)
>  EXT(ARB_multitexture, dummy_true 
> , GLL,  x ,  x ,  x , 1998)
> -EXT(ARB_occlusion_query2,

Re: [Mesa-dev] [PATCH] radeon/llvm: Use llvm.AMDIL.exp intrinsic again for now

2015-11-18 Thread Tom Stellard

On Thu, Nov 19, 2015 at 11:31:55AM +0900, Michel Dänzer wrote:
> From: Michel Dänzer 
> 
> llvm.exp2.f32 doesn't work in some cases yet.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92709
> Signed-off-by: Michel Dänzer 
> ---
> 
> Once the problem is fixed in the LLVM AMDGPU backend, we can re-enable
> llvm.exp2.f32 for the fixed LLVM version.
> 
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index ac99e73..c94f109 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1539,7 +1539,7 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
>   bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
>   bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
> + bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";

Do we want a native instruction here, or do we want IEEE precise exp2?
If it's the former then we shouldn't be using llvm.exp2.f32 anyway.

I know that we need to use llvm.AMDIL.exp. for older LLVM, but for newer
LLVM, I would really like to start doing intrinsics the correct way.  In
this case, it means adding an llvm.amdgcn.exp.f32 intrinsic to
include/llvm/IR/IntrinsicsAMDGPU.td.  In the section with the amdgcn
TargetPrefix.

-Tom
>   bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
>   bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
> -- 
> 2.6.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] [v3] i965/skl: skip fast clears for certain surface formats

2015-11-18 Thread Ben Widawsky

On Wed, Nov 18, 2015 at 12:15:40PM +0200, Pohjolainen, Topi wrote:
> On Tue, Nov 17, 2015 at 05:31:12PM -0800, Ben Widawsky wrote:
> > Some of the information originally in this commit message is now in the 
> > patch
> > before this.
> > 
> > SKL adds compressible render targets and as a result mutates some of the
> > programming for fast clears and resolves. There is a new internal surface 
> > type
> > called the CCS. The old AUX_MCS bit becomes AUX_CCS_D. "The Auxiliary 
> > surface is
> > a CCS (Color Control Surface) with compression disabled or an MCS with
> > compression enabled, depending on number of multisamples. MCS (Multisample
> 
> I find this wording (even though it looks to be directly from the spec
> somewhat misleading. I read it suggesting that AUX can be CCS only when
> compression is disabled. I like another wording in "Auxiliary Surfaces For
> Sampled Tiled Resource" better: "CCS is used to indicate that the color
> surface is losslessly compressed."
> 
> Anyway the patch itself:
> 
> Reviewed-by: Topi Pohjolainen 
> 

Thanks Topi. I like this quote better as well.

[snip]

Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 8/8] i965: Enable EXT_shader_samples_identical

2015-11-18 Thread Jason Ekstrand

On Wed, Nov 18, 2015 at 5:23 PM, Ian Romanick  wrote:
> On 11/18/2015 04:06 PM, Kenneth Graunke wrote:
>> On Wednesday, November 18, 2015 03:46:54 PM Ian Romanick wrote:
>>> From: Ian Romanick 
>>>
>>> Signed-off-by: Ian Romanick 
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  1 +
>>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 16 
>>>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp |  1 +
>>>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +++
>>>  src/mesa/drivers/dri/i965/intel_extensions.c   |  1 +
>>>  5 files changed, 30 insertions(+)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> index 1f71f66..4af1234 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> @@ -2550,6 +2550,7 @@ fs_visitor::nir_emit_texture(const fs_builder , 
>>> nir_tex_instr *instr)
>>>   switch (instr->op) {
>>>   case nir_texop_txf:
>>>   case nir_texop_txf_ms:
>>> + case nir_texop_samples_identical:
>>>  coordinate = retype(src, BRW_REGISTER_TYPE_D);
>>>  break;
>>>   default:
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> index a7bd9ce..6688f6a 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> @@ -259,6 +259,22 @@ fs_visitor::emit_texture(ir_texture_opcode op,
>>>lod = fs_reg(0u);
>>> }
>>>
>>> +   if (op == ir_samples_identical) {
>>> +  fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 
>>> 1));
>>> +
>>> +  if (mcs.file == BRW_IMMEDIATE_VALUE) {
>>> + fs_reg tmp = vgrf(glsl_type::uint_type);
>>> +
>>> + bld.MOV(tmp, mcs);
>>> + bld.CMP(dst, tmp, src_reg(0u), BRW_CONDITIONAL_EQ);
>>
>> Seems a little strange to emit assembly to do the comparison when
>> you've already determined that the value is a compile time constant.
>>
>> Why not just:
>>
>>bld.MOV(dst, fs_reg(mcs.ud == 0u ? ~0u : 0u));
>
> Mostly because I didn't realized that the constant value was so
> trivially available... and I was lazy. :)  I can change this too.
>
> I'm finding it difficult to hit this path without modifying the driver
> to not use MCS.  Suggestions?

Looking at intel_mipmap_tree.c:79, it seems that you can force it by
using an integer surface on Ivy Bridge or Haswell.

>>> +  } else {
>>> + bld.CMP(dst, mcs, src_reg(0u), BRW_CONDITIONAL_EQ);
>>> +  }
>>> +
>>> +  this->result = dst;
>>> +  return;
>>> +   }
>>> +
>>> if (coordinate.file != BAD_FILE) {
>>>/* FINISHME: Texture coordinate rescaling doesn't work with 
>>> non-constant
>>> * samplers.  This should only be a problem with GL_CLAMP on Gen7.
>>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>>> index 3c2674d..41c3c10 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>>> @@ -1615,6 +1615,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
>>>   switch (instr->op) {
>>>   case nir_texop_txf:
>>>   case nir_texop_txf_ms:
>>> + case nir_texop_samples_identical:
>>>  coordinate = get_nir_src(instr->src[i].src, 
>>> BRW_REGISTER_TYPE_D,
>>>   src_size);
>>>  coord_type = glsl_type::ivec(src_size);
>>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>>> index fda3d7c..2190a86 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>>> @@ -909,6 +909,17 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
>>>unreachable("TXB is not valid for vertex shaders.");
>>> case ir_lod:
>>>unreachable("LOD is not valid for vertex shaders.");
>>> +   case ir_samples_identical: {
>>> +  if (mcs.file == BRW_IMMEDIATE_VALUE) {
>>> + const src_reg temp = src_reg(this, glsl_type::uint_type);
>>> +
>>> + emit(MOV(dst_reg(temp), mcs));
>>> + emit(CMP(dest, temp, src_reg(0u), BRW_CONDITIONAL_EQ));
>>
>> Ditto.
>>
>>bld.MOV(dst, src_reg(mcs.ud == 0u ? ~0u : 0u));
>>
>>> +  } else {
>>> + emit(CMP(dest, mcs, src_reg(0u), BRW_CONDITIONAL_EQ));
>>> +  }
>>> +  return;
>>> +   }
>>> default:
>>>unreachable("Unrecognized tex op");
>>> }
>>> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
>>> b/src/mesa/drivers/dri/i965/intel_extensions.c
>>> index 386b63c..2e2459c 100644
>>> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
>>> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
>>> @@ -333,6 +333,7 @@

Re: [Mesa-dev] [PATCH 1/2] mesa/extensions: Sort the extension table alphabetically

2015-11-18 Thread Matt Turner

On Wed, Nov 18, 2015 at 6:07 PM, Ian Romanick  wrote:
> On 11/18/2015 03:01 PM, Nanley Chery wrote:
>> From: Nanley Chery 
>>
>> Make it easier to determine where to add new extensions.
>> Performed with the vim sort command.
>
> Uh... no, please.  Extensions should be sorted in each group, but what
> is wrong with the old group ordering?  ARB, KHR, OES, EXT, then vendors.

There's nothing wrong with it other than it's not possible to ensure
people get it right (see this patch). By actually alphabetizing the
list, we can programmatically check that things are ordered properly
(see patch 2/2).

I don't see a problem with reordering the groups... that doesn't make
anything harder.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa/extensions: Document the extensions table

2015-11-18 Thread Nanley Chery

On Wed, Nov 18, 2015 at 12:33 PM, Emil Velikov 
wrote:

> Hi Nanley,
>
> On 18 November 2015 at 20:29, Nanley Chery  wrote:
> > From: Nanley Chery 
> >
> > Help developers understand the table's organization.
> >
> > Suggested-by: Jason Ekstrand 
> > Signed-off-by: Nanley Chery 
> Can you bring back the comments that were in here, please. They might
> have cause issues with sed prior but things should be fine now. Also
> move the existing FIXME as originally, as mentioned by Marek and
> myself.
>
>
There's one more organizational change that's possible - sorting the table
alphabetically. The original comments would probably be unnecessary if we
choose to perform that change.

When first adding ASTC to the table, I did not place the extensions in the
right rows partly because the existing alphabetical sorting was
inconsistent. By completely sorting the table it becomes a easier to
determine where to place new extensions.

If the sorted order is checked with a gtest, it would then be possible to
know if the table is sorted and, if desired, perform a bsearch on the
table. I'll send out a patch to test interest on this. I have a wip branch
that does what I've described (minus the bsearch) :
http://cgit.freedesktop.org/~nchery/mesa/log/?h=ext_sort

Wrt to the FIXME, I was confused by your original comment and thought that
you actually wanted the FIXME on the same line as the extension. Hence, my
comment about there being no unanimous opinion on the change. I wasn't sure
what action to take in the midst of the perceived disagreement, but now
that I understand your comment, I'll send out the patch for it.

Regards,
Nanley

> Thanks
> Emil
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] mesa: Add test for sorted extension table

2015-11-18 Thread Emil Velikov

On 19 November 2015 at 00:16, Nanley Chery  wrote:
> On Wed, Nov 18, 2015 at 4:08 PM, Emil Velikov 
> wrote:
>>
>> On 18 November 2015 at 23:40, Nanley Chery  wrote:
>> > On Wed, Nov 18, 2015 at 3:07 PM, Ilia Mirkin 
>> > wrote:
>> >>
>> >> Why have the count as an entry in the enum? Can't you just do
>> >> ARRAY_SIZE(_mesa_extension_table) ?
>> >>
>> >
>> > The compiler doesn't know the size of the array by just looking at the
>> > header. To use ARRAY_SIZE, I'd have to modify the declaration of
>> > _mesa_extension_table to specify a size of MESA_EXTENSION_COUNT.
>> >
>> Afaict there is no declaration, only a definition of the array. That
>> aside I'm not sure what makes you think that - we have dozens of cases
>> in mesa which use this approach.
>>
>
> The array is declared in extensions.h and defined in extensions.c.
>
> I compiled the test using the ARRAY_SIZE macro and got the following error:
> mesa_extensions.cpp:40:53: error: invalid application of ‘sizeof’ to
> incomplete type ‘const mesa_extension []’
>

>> >> > +TEST(MesaExtensionsTest, AlphabeticallySorted)
>> >> > +{
>> >> > +   for (int i = 0; i < MESA_EXTENSION_COUNT - 1; ++i) {
>> >> > +  const char * current_str = _mesa_extension_table[i].name;
>> >> > +  const char * next_str = _mesa_extension_table[i+1].name;
>> >> > +
>> >> > +  /* We expect the extension table to be alphabetically sorted
>> >> > */
>> >> > +  ASSERT_LT(strcmp(current_str, next_str), 0);
>> Am I day dreaming or something's wrong here. On the last iteration
>> you'll end up comparing the final extension name against ... null ?
>>
>
> The last iteration compares the 2nd-to-last extension name against the last
> extension name.
>
Ouch you're spot-on for both things.

Thanks for calling me out, and pardon for the noise.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/2] mesa: Add KBL PCI IDs and platform information.

2015-11-18 Thread Ilia Mirkin

On Wed, Nov 18, 2015 at 4:54 PM, Sarah Sharp
 wrote:
>> There's not really a consensus I guess, but most people do leave the version
>> information in the final commit message.
>
> I personally feel like that's leaving boredom doodles on a final
> architectural drawing. If people want to know the back-and-forth
> history, the mailing list archive will always be there. So, no, I don't
> really want to leave version info in the commit message.

FWIW I wholeheartedly agree with this line of reasoning. I never put
the version info into my commits either, and find it
confusing/misleading when others do. I want to know the final state of
things when looking at the commit 1 year from now, not the 20-step
process and all the wrong turns to get there.

Cheers,

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] i965/fs: Add support for gl_HelperInvocation system value.

2015-11-18 Thread Matt Turner

On Wed, Nov 18, 2015 at 2:25 PM, Matt Turner  wrote:
> ---
> This fails... for reasons I cannot determine. Can anyone spot what's wrong?

Ilia identified the problem in 10 seconds -- I need to NOT the pixel
mask. A set bit in the mask means the channel is enabled, and that's
the opposite of what gl_HelperInvocation means.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa/extensions: Sort the extension table alphabetically

2015-11-18 Thread Matt Turner

On Wed, Nov 18, 2015 at 3:01 PM, Nanley Chery  wrote:
> From: Nanley Chery 
>
> Make it easier to determine where to add new extensions.
> Performed with the vim sort command.

Well, I think I'm obligated to review such a patch :)

> Signed-off-by: Nanley Chery 
> ---
>  src/mesa/main/extensions_table.h | 173 
> +--
>  1 file changed, 92 insertions(+), 81 deletions(-)
>
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index d12fd9f..4b42863 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -1,8 +1,30 @@
> +/* The extension table is alphabetically sorted by the extension name string 
> column. */
> +
>  #define GLL 0
>  #define GLC 0
>  #define ES1 0
>  #define ES2 0
>  #define  x ~0

I'd probably put a blank line between the last #define and the first EXT(...)

I trust sort, so

Reviewed-by: Matt Turner 

> +EXT(3DFX_texture_compression_FXT1   , TDFX_texture_compression_FXT1  
> , GLL, GLC,  x ,  x , 1999)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/8] mesa: Extension tracking for EXT_shader_samples_indentical

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
 src/mesa/main/extensions_table.h | 1 +
 src/mesa/main/mtypes.h   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 8685a89..87b6645 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -177,6 +177,7 @@ EXT(EXT_secondary_color , dummy_true
 EXT(EXT_separate_shader_objects , dummy_true   
  ,  x ,  x ,  x , ES2, 2013)
 EXT(EXT_separate_specular_color , dummy_true   
  , GLL,  x ,  x ,  x , 1997)
 EXT(EXT_shader_integer_mix  , EXT_shader_integer_mix   
  , GLL, GLC,  x ,  30, 2013)
+EXT(EXT_shader_samples_identical, EXT_shader_samples_identical 
  , GLL, GLC,  x ,  31, 2015)
 EXT(EXT_shadow_funcs, ARB_shadow   
  , GLL,  x ,  x ,  x , 2002)
 EXT(EXT_stencil_two_side, EXT_stencil_two_side 
  , GLL,  x ,  x ,  x , 2001)
 EXT(EXT_stencil_wrap, dummy_true   
  , GLL,  x ,  x ,  x , 2002)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 4efdf1e..65276f9 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3761,6 +3761,7 @@ struct gl_extensions
GLboolean EXT_polygon_offset_clamp;
GLboolean EXT_provoking_vertex;
GLboolean EXT_shader_integer_mix;
+   GLboolean EXT_shader_samples_identical;
GLboolean EXT_stencil_two_side;
GLboolean EXT_texture_array;
GLboolean EXT_texture_compression_latc;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/8] Implement EXT_shader_samples_identical

2015-11-18 Thread Ian Romanick

This patch series implements a new GL extension,
EXT_shader_samples_identical.  This extension allows shaders to
determine when all of the samples in a particular texel are the same.
This takes advantage of the way compressed multisample surfaces are
stored on modern Intel and AMD hardware.  This enables optimizations in
application multisample resolve filters, etc.

I really wanted to get this in the next Mesa release.  For some reason,
I thought the branch point was after Thanksgiving (which is next
Thursday).  Ken reminded me yesterday that the branch point is actually
this Friday. :( As a result, I'm sending it out today to get review as
soon as possible.

I also wanted to get as much time as possible for other drivers to get
implementations.  I worked with Graham Sellers on this extension, and he
assures me that the implementation on modern Radeons is trivial.  My
expectation is that it should be about the same as the Intel
implementation.

There will be some extra TGSI bits needed, but that should also be
trivial.  For the NIR and i965 backend bits, I mostly copied and blended
the implementations of txf_ms and query_samples.

There are currently only trivial piglit tests, but I am working on more.
I basically hacked up tests/spec/arb_texture_multisample/texelfetch.c to
use the extension to render different colors based on whether
textureSamplesIdenticalEXT returned true or false.  The resulting image
and the generated assembly look good.  My plan is to get a set of real
tests out by midday tomorrow.

As soon as we're confident that the spec is good, I'll submit it to
Khronos for publication in the registry.  I'm still waiting on feedback
from another closed-source driver writer.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/8] glsl: Extension tracking for EXT_shader_samples_indentical

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
 src/glsl/glcpp/glcpp-parse.y| 3 +++
 src/glsl/glsl_parser_extras.cpp | 1 +
 src/glsl/glsl_parser_extras.h   | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 6aa7abe..8fbf404 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2510,6 +2510,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
if (extensions != NULL) {
   if (extensions->EXT_shader_integer_mix)
  add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1);
+
+  if (extensions->EXT_shader_samples_identical)
+ add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1);
}
 
if (version >= 150)
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 3ed1168..dcc6b83 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -647,6 +647,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT(EXT_draw_buffers,   false,  true, dummy_true),
EXT(EXT_separate_shader_objects,false, true,  dummy_true),
EXT(EXT_shader_integer_mix, true,  true,  
EXT_shader_integer_mix),
+   EXT(EXT_shader_samples_identical,   true,  true,  
EXT_shader_samples_identical),
EXT(EXT_texture_array,  true,  false, EXT_texture_array),
 };
 
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 1d8c1b8..e76d16e 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -601,6 +601,8 @@ struct _mesa_glsl_parse_state {
bool EXT_separate_shader_objects_warn;
bool EXT_shader_integer_mix_enable;
bool EXT_shader_integer_mix_warn;
+   bool EXT_shader_samples_identical_enable;
+   bool EXT_shader_samples_identical_warn;
bool EXT_texture_array_enable;
bool EXT_texture_array_warn;
/*@}*/
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/8] nir: Add nir_texop_samples_identical opcode

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

This is the NIR analog to GLSL IR ir_samples_identical.  However, the
NIR has an extra source.  This is a fake sample index with a type
nir_tex_src_ms_index.  This enables backends to (likely) share more code
with the existing nir_texop_txf_ms implementation.

Signed-off-by: Ian Romanick 
---
 src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c |  3 +++
 src/glsl/nir/glsl_to_nir.cpp | 11 +++
 src/glsl/nir/nir.h   |  4 
 src/glsl/nir/nir_print.c |  4 +++-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp   |  1 +
 6 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c 
b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 157dc73..0f5c7e9 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1624,6 +1624,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
case nir_texop_tg4:
case nir_texop_query_levels:
case nir_texop_texture_samples:
+   case nir_texop_samples_identical:
compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
return;
}
@@ -1889,6 +1890,8 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
case nir_texop_query_levels:
emit_tex_query_levels(ctx, tex);
break;
+   case nir_texop_samples_identical:
+   unreachable("nir_texop_samples_identical");
default:
emit_tex(ctx, tex);
break;
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 6d24341..1cf3174 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -1798,6 +1798,11 @@ nir_visitor::visit(ir_texture *ir)
   num_srcs = 0;
   break;
 
+   case ir_samples_identical:
+  op = nir_texop_samples_identical;
+  num_srcs = 2; /* coordinate, sample_index (must be 0) */
+  break;
+
default:
   unreachable("not reached");
}
@@ -1825,6 +1830,7 @@ nir_visitor::visit(ir_texture *ir)
case GLSL_TYPE_INT:
   instr->dest_type = nir_type_int;
   break;
+   case GLSL_TYPE_BOOL:
case GLSL_TYPE_UINT:
   instr->dest_type = nir_type_unsigned;
   break;
@@ -1915,6 +1921,11 @@ nir_visitor::visit(ir_texture *ir)
   instr->component = ir->lod_info.component->as_constant()->value.u[0];
   break;
 
+   case ir_samples_identical:
+  instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(, 0));
+  instr->src[src_number].src_type = nir_tex_src_ms_index;
+  src_number++;
+
default:
   break;
}
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index beabcaf..02b3f61 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -963,6 +963,9 @@ typedef enum {
nir_texop_tg4,/**< Texture gather */
nir_texop_query_levels,   /**< Texture levels query */
nir_texop_texture_samples,/**< Texture samples query */
+   nir_texop_samples_identical,  /**< Query whether all samples are definitely
+  * identical.
+  */
 } nir_texop;
 
 typedef struct {
@@ -1036,6 +1039,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
 
case nir_texop_texture_samples:
case nir_texop_query_levels:
+   case nir_texop_samples_identical:
   return 1;
 
default:
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index f7f5fdf..fa2f321 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -512,7 +512,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
case nir_texop_texture_samples:
   fprintf(fp, "texture_samples ");
   break;
-
+   case nir_texop_samples_identical:
+  fprintf(fp, "samples_identical ");
+  break;
default:
   unreachable("Invalid texture operation");
   break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index c282f83..cdd470b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2658,6 +2658,7 @@ fs_visitor::nir_emit_texture(const fs_builder , 
nir_tex_instr *instr)
   inst->base_mrf = -1;
   return;
}
+   case nir_texop_samples_identical: op = ir_samples_identical; break;
default:
   unreachable("unknown texture opcode");
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 27933d7..3c2674d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1545,6 +1545,7 @@ ir_texture_opcode_for_nir_texop(nir_texop texop)
case

[Mesa-dev] [PATCH 8/8] i965: Enable EXT_shader_samples_identical

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 16 
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +++
 src/mesa/drivers/dri/i965/intel_extensions.c   |  1 +
 5 files changed, 30 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 1f71f66..4af1234 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2550,6 +2550,7 @@ fs_visitor::nir_emit_texture(const fs_builder , 
nir_tex_instr *instr)
  switch (instr->op) {
  case nir_texop_txf:
  case nir_texop_txf_ms:
+ case nir_texop_samples_identical:
 coordinate = retype(src, BRW_REGISTER_TYPE_D);
 break;
  default:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index a7bd9ce..6688f6a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -259,6 +259,22 @@ fs_visitor::emit_texture(ir_texture_opcode op,
   lod = fs_reg(0u);
}
 
+   if (op == ir_samples_identical) {
+  fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 1, 1));
+
+  if (mcs.file == BRW_IMMEDIATE_VALUE) {
+ fs_reg tmp = vgrf(glsl_type::uint_type);
+
+ bld.MOV(tmp, mcs);
+ bld.CMP(dst, tmp, src_reg(0u), BRW_CONDITIONAL_EQ);
+  } else {
+ bld.CMP(dst, mcs, src_reg(0u), BRW_CONDITIONAL_EQ);
+  }
+
+  this->result = dst;
+  return;
+   }
+
if (coordinate.file != BAD_FILE) {
   /* FINISHME: Texture coordinate rescaling doesn't work with non-constant
* samplers.  This should only be a problem with GL_CLAMP on Gen7.
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 3c2674d..41c3c10 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1615,6 +1615,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
  switch (instr->op) {
  case nir_texop_txf:
  case nir_texop_txf_ms:
+ case nir_texop_samples_identical:
 coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D,
  src_size);
 coord_type = glsl_type::ivec(src_size);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index fda3d7c..2190a86 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -909,6 +909,17 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
   unreachable("TXB is not valid for vertex shaders.");
case ir_lod:
   unreachable("LOD is not valid for vertex shaders.");
+   case ir_samples_identical: {
+  if (mcs.file == BRW_IMMEDIATE_VALUE) {
+ const src_reg temp = src_reg(this, glsl_type::uint_type);
+
+ emit(MOV(dst_reg(temp), mcs));
+ emit(CMP(dest, temp, src_reg(0u), BRW_CONDITIONAL_EQ));
+  } else {
+ emit(CMP(dest, mcs, src_reg(0u), BRW_CONDITIONAL_EQ));
+  }
+  return;
+   }
default:
   unreachable("Unrecognized tex op");
}
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 386b63c..2e2459c 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -333,6 +333,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_texture_compression_bptc = true;
   ctx->Extensions.ARB_texture_view = true;
   ctx->Extensions.ARB_shader_storage_buffer_object = true;
+  ctx->Extensions.EXT_shader_samples_identical = true;
 
   if (can_do_pipelined_register_writes(brw)) {
  ctx->Extensions.ARB_draw_indirect = true;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/8] Import current draft of EXT_shader_samples_identical spec

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

Signed-off-by: Ian Romanick 
Cc: "Chris Forbes" 
---
 docs/specs/EXT_shader_samples_identical.txt | 174 
 1 file changed, 174 insertions(+)
 create mode 100644 docs/specs/EXT_shader_samples_identical.txt

diff --git a/docs/specs/EXT_shader_samples_identical.txt 
b/docs/specs/EXT_shader_samples_identical.txt
new file mode 100644
index 000..bae6c73
--- /dev/null
+++ b/docs/specs/EXT_shader_samples_identical.txt
@@ -0,0 +1,174 @@
+Name
+
+EXT_shader_samples_identical
+
+Name Strings
+
+GL_EXT_shader_samples_identical
+
+Contact
+
+Ian Romanick, Intel (ian.d.romanick 'at' intel.com)
+
+Contributors
+
+Chris Forbes, Mesa
+Magnus Wendt, Intel
+Graham Sellers, AMD
+
+Status
+
+XXX - Not complete yet.
+
+Version
+
+Last Modified Date: November 18, 2015
+Revision: 5
+
+Number
+
+TBD
+
+Dependencies
+
+OpenGL 3.2, or OpenGL ES 3.1, or ARB_texture_multisample is required.
+
+This extension is written against the OpenGL 4.5 (Core Profile)
+Specification
+
+Overview
+
+Multisampled antialiasing has become a common method for improving the
+quality of rendered images.  Multisampling differs from supersampling in
+that the color of a primitive that covers all or part of a pixel is
+resolved once, regardless of the number of samples covered.  If a large
+polygon is rendered, the colors of all samples in each interior pixel will
+be the same.  This suggests a simple compression scheme that can reduce
+the necessary memory bandwidth requirements.  In one such scheme, each
+sample is stored in a separate slice of the multisample surface.  An
+additional multisample control surface (MCS) contains a mapping from pixel
+samples to slices.
+
+If all the values stored in the MCS for a particular pixel are the same,
+then all the samples have the same value.  Applications can take advantage
+of this information to reduce the bandwidth of reading multisample
+textures.  A custom multisample resolve filter could optimize resolving
+pixels where every sample is identical by reading the color once.
+
+color = texelFetch(sampler, coordinate, 0);
+if (!textureSamplesAllIdenticalEXT(sampler, coordinate)) {
+for (int i = 1; i < MAX_SAMPLES; i++) {
+vec4 c = texelFetch(sampler, coordinate, i);
+
+//... accumulate c into color
+
+}
+}
+
+New Procedures and Functions
+
+None.
+
+New Tokens
+
+None.
+
+Additions to the OpenGL 4.5 (Core Profile) Specification
+
+None.
+
+Modifications to The OpenGL Shading Language Specification, Version 4.50.5
+
+Including the following line in a shader can be used to control the
+language features described in this extension:
+
+#extension GL_EXT_shader_samples_identical
+
+A new preprocessor #define is added to the OpenGL Shading Language:
+
+#define GL_EXT_shader_samples_identical
+
+Add to the table in section 8.7 "Texture Lookup Functions"
+
+Syntax:
+
+bool textureSamplesIdenticalEXT(gsampler2DMS sampler, ivec2 coord)
+
+bool textureSamplesIdenticalEXT(gsampler2DMSArray sampler,
+ivec3 coord)
+
+Description:
+
+Returns true if it can be determined that all samples within the texel
+of the multisample texture bound to  at  contain the
+same values or false if this cannot be determined."
+
+Additions to the AGL/EGL/GLX/WGL Specifications
+
+None
+
+Errors
+
+None
+
+New State
+
+None
+
+New Implementation Dependent State
+
+None
+
+Issues
+
+1) What should the new functions be called?
+
+RESOLVED: textureSamplesIdenticalEXT.  Initially
+textureAllSamplesIdenticalEXT was considered, but
+textureSamplesIdenticalEXT is more similar to the existing textureSamples
+function.
+
+2) It seems like applications could implement additional optimization if
+   they were provided with raw MCS data.  Should this extension also
+   provide that data?
+
+There are a number of challenges in providing raw MCS data.  The biggest
+problem being that the amount of MCS data depends on the number of
+samples, and that is not known at compile time.  Additionally, without new
+texelFetch functions, applications would have difficulty utilizing the
+information.
+
+Another option is to have a function that returns an array of tuples of
+sample number and count.  This also has difficulties with the maximum
+array size not being known at compile time.
+
+RESOLVED: Do not expose raw MCS data in this extension.
+
+3) Should this extension also extend SPIR-V?
+
+RESOLVED: Yes, but this has not yet been written.
+
+4) Is it possible for textureSamplesAllIdenticalEXT to report false 
negatives?
+
+RESOLVED: Yes.  It is possible

[Mesa-dev] [PATCH 4/8] glsl: Add ir_samples_identical opcode

2015-11-18 Thread Ian Romanick

From: Ian Romanick 

Signed-off-by: Ian Romanick 
---
 src/glsl/ir.cpp|  6 +-
 src/glsl/ir.h  |  2 ++
 src/glsl/ir_clone.cpp  |  1 +
 src/glsl/ir_equals.cpp |  1 +
 src/glsl/ir_hv_accept.cpp  |  1 +
 src/glsl/ir_print_visitor.cpp  | 10 ++
 src/glsl/ir_rvalue_visitor.cpp |  1 +
 src/glsl/opt_tree_grafting.cpp |  1 +
 src/mesa/program/ir_to_mesa.cpp|  2 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  2 ++
 10 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 80cbdbf..f47100e 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1421,7 +1421,7 @@ ir_dereference::is_lvalue() const
 }
 
 
-static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", 
"txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" };
+static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", 
"txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", 
"samples_identical" };
 
 const char *ir_texture::opcode_string()
 {
@@ -1455,6 +1455,10 @@ ir_texture::set_sampler(ir_dereference *sampler, const 
glsl_type *type)
} else if (this->op == ir_lod) {
   assert(type->vector_elements == 2);
   assert(type->base_type == GLSL_TYPE_FLOAT);
+   } else if (this->op == ir_samples_identical) {
+  assert(type == glsl_type::bool_type);
+  assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
+  assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
} else {
   assert(sampler->type->sampler_type == (int) type->base_type);
   if (sampler->type->sampler_shadow)
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index d59dee1..39156e0 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1965,6 +1965,7 @@ enum ir_texture_opcode {
ir_tg4, /**< Texture gather */
ir_query_levels, /**< Texture levels query */
ir_texture_samples,  /**< Texture samples query */
+   ir_samples_identical, /**< Query whether all samples are definitely 
identical. */
 };
 
 
@@ -1991,6 +1992,7 @@ enum ir_texture_opcode {
  * (lod   )
  * (tg4 )
  * (query_levels  )
+ * (samples_identical  )
  */
 class ir_texture : public ir_rvalue {
 public:
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index a2cd672..4484cc9 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -223,6 +223,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) 
const
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+   case ir_samples_identical:
   break;
case ir_txb:
   new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp
index 5f0785e..f230b5d 100644
--- a/src/glsl/ir_equals.cpp
+++ b/src/glsl/ir_equals.cpp
@@ -152,6 +152,7 @@ ir_texture::equals(const ir_instruction *ir, enum 
ir_node_type ignore) const
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+   case ir_samples_identical:
   break;
case ir_txb:
   if (!lod_info.bias->equals(other->lod_info.bias, ignore))
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index 6495cc4..213992a 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -195,6 +195,7 @@ ir_texture::accept(ir_hierarchical_visitor *v)
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+   case ir_samples_identical:
   break;
case ir_txb:
   s = this->lod_info.bias->accept(v);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 42b03fd..fd7bc2e 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -268,6 +268,14 @@ void ir_print_visitor::visit(ir_texture *ir)
 {
fprintf(f, "(%s ", ir->opcode_string());
 
+   if (ir->op == ir_samples_identical) {
+  ir->sampler->accept(this);
+  fprintf(f, " ");
+  ir->coordinate->accept(this);
+  fprintf(f, ")");
+  return;
+   }
+
print_type(f, ir->type);
fprintf(f, " ");
 
@@ -334,6 +342,8 @@ void ir_print_visitor::visit(ir_texture *ir)
case ir_tg4:
   ir->lod_info.component->accept(this);
   break;
+   case ir_samples_identical:
+  unreachable(!"ir_samples_identical was already handled");
};
fprintf(f, ")");
 }
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp
index a6966f5..6486838 100644
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -59,6 +59,7 @@ ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir)
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+   case ir_samples_identical:
   break;
case ir_txb:
   handle_rvalue(>lod_info.bias);
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp

1 2 >

1 - 100 of 152 matches

Mail list logo