Re: [Mesa-dev] [RFC 4/6] i965: Implement INTEL_performance_query extension

2015-05-28 Thread Petri Latvala

On 05/06/2015 03:53 AM, Robert Bragg wrote:

+static struct brw_perf_query_counter gen7_pipeline_statistics[] = {
+
+   STAT(IA_VERTICES_COUNT,   "N vertices submitted"),
+   STAT(IA_PRIMITIVES_COUNT, "N primitives submitted"),
+   STAT(VS_INVOCATION_COUNT, "N vertex shader invocations"),
+   STAT(HS_INVOCATION_COUNT, "N hull shader invocations"),
+   STAT(DS_INVOCATION_COUNT, "N domain shader invocations"),
+   STAT(GS_INVOCATION_COUNT, "N geometry shader invocations"),
+   STAT(GS_PRIMITIVES_COUNT, "N geometry shader primitives emitted"),
+   STAT(CL_INVOCATION_COUNT, "N primitives entering clipping"),
+   STAT(CL_PRIMITIVES_COUNT, "N primitives leaving clipping"),
+
+   /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround:
+* "Invocation counter is 4 times actual.  WA: SW to divide HW reported
+*  PS Invocations value by 4."
+*
+* Prior to Haswell, invocation count was counted by the WM, and it
+* buggily counted invocations in units of subspans (2x2 unit). To get the
+* correct value, the CS multiplied this by 4. With HSW the logic moved,
+* and correctly emitted the number of pixel shader invocations, but,
+* whomever forgot to undo the multiply by 4.
+*/
+   SCALED_STAT(PS_INVOCATION_COUNT, 1, 4, "N fragment shader invocations"),
+
+   STAT(PS_DEPTH_COUNT,  "N z-pass fragments"),
+
+   NAMED_STAT(GEN7_SO_PRIM_STORAGE_NEEDED(0), "SO_NUM_PRIMS_WRITTEN (Stream 
0)",
+  "N stream-out (stream 0) primitives (total)"),
+   NAMED_STAT(GEN7_SO_PRIM_STORAGE_NEEDED(1), "SO_NUM_PRIMS_WRITTEN (Stream 
1)",
+  "N stream-out (stream 1) primitives (total)"),
+   NAMED_STAT(GEN7_SO_PRIM_STORAGE_NEEDED(2), "SO_NUM_PRIMS_WRITTEN (Stream 
2)",
+  "N stream-out (stream 2) primitives (total)"),
+   NAMED_STAT(GEN7_SO_PRIM_STORAGE_NEEDED(3), "SO_NUM_PRIMS_WRITTEN (Stream 
3)",
+  "N stream-out (stream 3) primitives (total)"),
+   NAMED_STAT(GEN7_SO_NUM_PRIMS_WRITTEN(0), "SO_NUM_PRIMS_WRITTEN (Stream 0)",
+  "N stream-out (stream 0) primitives (written)"),
+   NAMED_STAT(GEN7_SO_NUM_PRIMS_WRITTEN(1), "SO_NUM_PRIMS_WRITTEN (Stream 1)",
+  "N stream-out (stream 1) primitives (written)"),
+   NAMED_STAT(GEN7_SO_NUM_PRIMS_WRITTEN(2), "SO_NUM_PRIMS_WRITTEN (Stream 2)",
+  "N stream-out (stream 2) primitives (written)"),
+   NAMED_STAT(GEN7_SO_NUM_PRIMS_WRITTEN(3), "SO_NUM_PRIMS_WRITTEN (Stream 3)",
+  "N stream-out (stream 3) primitives (written)"),
+};
+


Copy-paste error? SO_PRIM_STORAGE_NEEDED gets reported as 
SO_NUM_PRIMS_WRITTEN.



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 12/15] nv50/ir: optimize the use of std::tr1::unordered_set

2015-05-20 Thread Petri Latvala

On 05/20/2015 06:30 AM, Ilia Mirkin wrote:

+typedef const iterator const_iterator;
+


This at least is wrong. A const iterator allows *iter = val, a 
const_iterator doesn't. A const_iterator allows ++, a const iterator 
doesn't.




--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 01/16] glsl: Add tracking for GLSL precision qualifiers

2015-05-18 Thread Petri Latvala

On 05/15/2015 12:39 PM, Topi Pohjolainen wrote:

diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 5645dcd..25c4d30 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -100,6 +100,13 @@ enum glsl_matrix_layout {
 GLSL_MATRIX_LAYOUT_ROW_MAJOR
  };
  
+enum {

+   GLSL_PRECISION_NONE = 0,
+   GLSL_PRECISION_HIGH,
+   GLSL_PRECISION_MEDIUM,
+   GLSL_PRECISION_LOW
+};
+
  #ifdef __cplusplus
  #include "GL/gl.h"
  #include "util/ralloc.h"
@@ -768,6 +775,11 @@ struct glsl_struct_field {
  * streams (as in ir_variable::stream). -1 otherwise.
  */
 int stream;
+
+   /**
+* Precission qualifier
+*/
+   unsigned precision;
  };


Typo in the comment, "Precision".

Is there a piglit test for uniforms with different precisions? That 
should be mentioned in the commit log imho.



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 9/9] mark GL_ARB_framebuffer_no_attachments as done for i965

2015-04-29 Thread Petri Latvala

On 04/29/2015 11:56 AM, kevin.rogo...@intel.com wrote:

From: Kevin Rogovin 

Mark GL_ARB_framebuffer_no_attachments as done for i965.

---
  docs/GL3.txt  | 2 +-
  docs/relnotes/10.6.0.html | 1 +
  2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 172fd3c..cf3b5a2 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -157,7 +157,7 @@ GL 4.3, GLSL 4.30:
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that 
support GLSL)
GL_ARB_fragment_layer_viewport   DONE (nv50, nvc0, 
r600, llvmpipe)
-  GL_ARB_framebuffer_no_attachmentsnot started
+  GL_ARB_framebuffer_no_attachmentsDONE (i965)
GL_ARB_internalformat_query2 not started
GL_ARB_invalidate_subdataDONE (all drivers)
GL_ARB_multi_draw_indirect   DONE (i965, nvc0, 
r600, radeonsi, llvmpipe, softpipe)


At the bottom is another block with GLES 3.1 requirements, which also 
contains GL_ARB_f_n_a.




--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] mesa: add support for exposing up to GL4.2

2015-04-24 Thread Petri Latvala

On 04/23/2015 07:28 PM, Ilia Mirkin wrote:

Add the 4.0/4.1/4.2 extensions lists to compute_version. A coule of
extensions aren't in mesa yet, so those are marked with 0 until they
become supported.

Signed-off-by: Ilia Mirkin 



coule -> couple.


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Precision of float and trigonometric functions on i965

2014-12-11 Thread Petri Latvala

On 12/11/2014 11:59 AM, Iago Toral wrote:

That said, I also noticed that most of the errors reported are for
fairly big numbers, so I played a bit with some examples and noticed
that trigonometric functions lose more precision as their argument gets
bigger. If I pass arguments of a few thousand radians to sin() or cos()
I usually get results that are off by >0.1 and for many values over
15000 radians I get completely bogus results, off by more than 0.5 of
even 1.0 in some cases. Some examples:

Angle in radians   | sin() result | Expected |  Error   |
13000.0| 0.05308(...) | 0.08947(...) | ~ 0.036  |
14000.0| 0.85559(...) | 0.87388(...) | ~ 0.018  |
15000.0| 0.00018(...) | 0.89324(...) | ~ 0.893  |
16000.0| 0.82698(...) | 0.13100(...) | ~ 0.696  |
24500.0| 0.0(...) | 0.95833(...) | ~ 0.958  |

I suppose this is a known issue, right? Also, considering that sin() is
implemented as a single Math hardware instruction I imagine there is
little that software can do to correct this in any case...



According to the hw specs, sin and cos absolute error is <= 0.0008, but 
only for the range of +/- 100 * pi.



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Fix an off-by-1 error in the draw upload code's size calculation.

2014-10-24 Thread Petri Latvala

On 10/16/2014 05:01 AM, Ian Romanick wrote:


That seems very likely.  This may be an optimization / work-around to
enable via driconf.  If two games make that mistake, you can be sure
there are others. :(



Some data points:

Dungeon Defenders is another Unreal Engine 3 game, but it doesn't give 
this perf warning.


Natural Selection 2 gives perf warnings that are similar to this case. 
Haven't tested with this patch, unknown whether it's the same issue 
there. Maybe Eero can test the patch with NS2?



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 9/9] glsl: Add tests for minmax prune

2014-08-18 Thread Petri Latvala

On 08/13/2014 01:59 AM, Dylan Baker wrote:

On Tuesday, July 29, 2014 12:36:39 PM Petri Latvala wrote:

tests/minmax/create_test_cases.py generates the following tests:

multiple_min*.opt_test:
  Construct a tree of min expressions for all permutations of a var_ref
  and three constants. They should all optimize to a single min with
  the variable and the smallest constant.
multiple_max*.opt_test:
  Same as above, for max.
mid3opt*.opt_test:
  Test that code generated from a mid3() for two constants and a
  var_ref optimizes to a single max and a single min.
mixed_vectors*.opt_test:
  Test that the optimization pass doesn't modify expression trees with
  constant vectors where some components compare as less, some as
  greater.

Signed-off-by: Petri Latvala 
---
  src/glsl/tests/minmax/.gitignore   |   3 +
  src/glsl/tests/minmax/create_test_cases.py | 151 +
  2 files changed, 154 insertions(+)
  create mode 100644 src/glsl/tests/minmax/.gitignore
  create mode 100644 src/glsl/tests/minmax/create_test_cases.py

diff --git a/src/glsl/tests/minmax/.gitignore b/src/glsl/tests/minmax/.gitignore
new file mode 100644
index 000..e98df62
--- /dev/null
+++ b/src/glsl/tests/minmax/.gitignore
@@ -0,0 +1,3 @@
+*.opt_test
+*.expected
+*.out
diff --git a/src/glsl/tests/minmax/create_test_cases.py 
b/src/glsl/tests/minmax/create_test_cases.py
new file mode 100644
index 000..4f78980
--- /dev/null
+++ b/src/glsl/tests/minmax/create_test_cases.py
@@ -0,0 +1,151 @@
+# coding=utf-8
+#
+# Copyright © 2014 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import os
+import os.path
+import re
+import subprocess
+import sys
+import itertools

This comment applies to all the patches.
You're importing a bunch of modules you're not using, you should remove
any that are not used.

In this file os.path, re, and subprocess are not used.


Oh, yes, leftovers from the refactoring. Fix inc.


+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+from sexps import *
+from test_case_generator import *
+
+def test_multiple_max():
+doc_string = """Test that multiple constants in multiple max expressions are reduced to a 
single max."""

What is this? If it's a docstring it's not assigned, it's just a triple
quoted string at the start of the function or class. Fix this for the
other functions as well


I followed the convention from the jump lowering tests. It's the string 
printed to the generated test script. I'll fix the single-line strings 
to normal quotes.



+
+operands = [const_float(1),
+const_float(2),
+const_float(3),
+['var_ref', 'a']]
+
+c = 1
+for ops in itertools.permutations(operands):
+maxtree1 = reduce(lambda a, b: max_(a, b, 'float'), ops)
+maxtree2 = reduce(lambda a, b: max_(b, a, 'float'), ops)
+
+expected = max_(const_float(3), ['var_ref', 'a'], 'float')
+
+input_sexp = make_test_case('main', 'void', (
+assign_x('b', maxtree1) +
+assign_x('c', maxtree2)
+))
+expected_sexp = make_test_case('main', 'void', (
+assign_x('b', expected) +
+assign_x('c', expected)
+))
+
+create_test_case(doc_string, input_sexp, expected_sexp, 
'multiple_max{0}'.format(c), 'do_minmax_prune')
+c += 1
+
+def test_multiple_min():
+doc_string = """Test that multiple constants in multiple min expressions are reduced to a 
single min."""
+
+operands = [const_float(1),
+const_float(2),
+  

Re: [Mesa-dev] [PATCH 3/9] glsl: Refactor the python test case generator

2014-08-18 Thread Petri Latvala

On 08/13/2014 01:51 AM, Dylan Baker wrote:

On Tuesday, July 29, 2014 12:36:33 PM Petri Latvala wrote:

Move the IR sexp builder helpers and test script creation parts of
tests/lower_jumps/create_test_cases.py into tests/test_case_generator.py

No functional changes.

Signed-off-by: Petri Latvala 
---
  src/glsl/tests/lower_jumps/create_test_cases.py | 336 +++-
  src/glsl/tests/test_case_generator.py   | 293 +
  2 files changed, 334 insertions(+), 295 deletions(-)
  create mode 100644 src/glsl/tests/test_case_generator.py

diff --git a/src/glsl/tests/lower_jumps/create_test_cases.py 
b/src/glsl/tests/lower_jumps/create_test_cases.py
index 3be1079..9783627 100644
--- a/src/glsl/tests/lower_jumps/create_test_cases.py
+++ b/src/glsl/tests/lower_jumps/create_test_cases.py
@@ -27,278 +27,9 @@ import re
  import subprocess
  import sys
  
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir

+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access 
to sexps.py and test_case_generator.py, which are in parent dir
  from sexps import *
-
-def make_test_case(f_name, ret_type, body):
-"""Create a simple optimization test case consisting of a single
-function with the given name, return type, and body.
-
-Global declarations are automatically created for any undeclared
-variables that are referenced by the function.  All undeclared
-variables are assumed to be floats.
-"""
-check_sexp(body)
-declarations = {}
-def make_declarations(sexp, already_declared = ()):
-if isinstance(sexp, list):
-if len(sexp) == 2 and sexp[0] == 'var_ref':
-if sexp[1] not in already_declared:
-declarations[sexp[1]] = [
-'declare', ['in'], 'float', sexp[1]]
-elif len(sexp) == 4 and sexp[0] == 'assign':
-assert sexp[2][0] == 'var_ref'
-if sexp[2][1] not in already_declared:
-declarations[sexp[2][1]] = [
-'declare', ['out'], 'float', sexp[2][1]]
-make_declarations(sexp[3], already_declared)
-else:
-already_declared = set(already_declared)
-for s in sexp:
-if isinstance(s, list) and len(s) >= 4 and \
-s[0] == 'declare':
-already_declared.add(s[3])
-else:
-make_declarations(s, already_declared)
-make_declarations(body)
-return declarations.values() + \
-[['function', f_name, ['signature', ret_type, ['parameters'], body]]]
-
-
-# The following functions can be used to build expressions.
-
-def const_float(value):
-"""Create an expression representing the given floating point value."""
-return ['constant', 'float', ['{0:.6f}'.format(value)]]
-
-def const_bool(value):
-"""Create an expression representing the given boolean value.
-
-If value is not a boolean, it is converted to a boolean.  So, for
-instance, const_bool(1) is equivalent to const_bool(True).
-"""
-return ['constant', 'bool', ['{0}'.format(1 if value else 0)]]
-
-def gt_zero(var_name):
-"""Create Construct the expression var_name > 0"""
-return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)]
-
-
-# The following functions can be used to build complex control flow
-# statements.  All of these functions return statement lists (even
-# those which only create a single statement), so that statements can
-# be sequenced together using the '+' operator.
-
-def return_(value = None):
-"""Create a return statement."""
-if value is not None:
-return [['return', value]]
-else:
-return [['return']]
-
-def break_():
-"""Create a break statement."""
-return ['break']
-
-def continue_():
-"""Create a continue statement."""
-return ['continue']
-
-def simple_if(var_name, then_statements, else_statements = None):
-"""Create a statement of the form
-
-if (var_name > 0.0) {
-   
-} else {
-   
-}
-
-else_statements may be omitted.
-"""
-if else_statements is None:
-else_statements = []
-check_sexp(then_statements)
-check_sexp(else_statements)
-return [['if', gt_zero(var

Re: [Mesa-dev] [PATCH] squash! glsl: Optimize min/max expression trees

2014-08-18 Thread Petri Latvala

On 08/14/2014 11:00 AM, Connor Abbott wrote:



Another thing I'd like to see is to change minmax_range to call things
"low" and "high" instead of "range[0]" and "range[1]." This helps
readability, and the tricks with indirect addressing that having an
array lets you do are things we really shouldn't be doing anyways
because it's hard to follow.


Sure, changing.



As I mentioned before, swizzle_if_required() should probably use the
ir_builder swizzle helpers.


I copied swizzle_if_required from opt_algebraic. I'll squeeze in a patch 
that changes that as well. Or actually just refactor the function to 
live somewhere where it's reusable.





I'm still not convinced that the algorithm is the best way to go about
it. Right now, AFAICT, we do something like:

- Pass in a "base range," which is what the min's and max's above us
in the tree will clamp the value we return to
- Get the ranges for each subexpression (this is a recursive call)
- Check and see if each operand is unnecessary (i.e. its range is
strictly greater than the base range or strictly greater than the
other argument for mins, the other way around for max's)

As another thing, the logic for this part could be made a *lot*
clearer by rearranging the code and commenting. I'd do something like:

bool is_redundant = false /* whether this operand will never affect
the final value of the min-max tree */

if (is_min) {
/* if this operand will always be greater than the other one, it's
redundant */
if (limit[i].low > limit[1 - i].high)
   is_redundant = true;

/* if this operand is always greater than baserange, then even if
it's smaller than the other one it'll get clamped so it's redundant */
if (limit[i].low > baserange.high)
   is_redundant = true;
} else {
... the exact same logic mirrored ...
}

- Recurse into the subexpressions, computing the new baserange.

What I think we should do instead is change prune_expression() to also
return the range for the expression (it's now returning two things, so
one would have to be passed via a class variable), so it would look
like:

- Pass in the base range
- If this is a constant, return ourself and the range with low == high
- Recurse into both subexpressions, setting both the range (limits[i])
and the new subexpression
- If one of the subexpressions is redundant, return the other
subexpression and its range
- Otherwise, return ourself and the combination of the ranges

This will allow us to do the recursion only once, instead of once in
get_range() and once in prune_expression(), which will make things
simpler and faster.



You mean have only prune_expression(), cut out get_range()?

I tried hard to have this recurse only once and it looks impossible to 
me. Consider this (hopefully this ascii art gets through fine):


 max
  /   \
 max max
/   \   /   \
   3a   b2

(If ascii art failed, it'smax(max(3, a), max(b, 2)) )

a and b are variables, 2 and 3 constants. 2 is to be dropped from the 
right subtree of the top max, but for that we need the 3 from the left 
subtree. prune_expression() on the left subtree will get us the 3 as the 
limit, which correctly drops the 2 when recursed to the right subtree. 
What about if 3 and 2 are swapped in the tree?


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] squash! glsl: Optimize min/max expression trees

2014-08-18 Thread Petri Latvala

On 08/14/2014 07:04 AM, Matt Turner wrote:

---
I'd squash this in at minimum. The changes are

  - Whitespace
  - Removal of unnecessary destructor
  - Renaming "one" and "two" to "a" and "b" (one->value.u[c0] < 
two->value.u[c0]...)
  - continue -> break
  - assert(!...) -> unreachable
  - Not doing assignments in if conditionals
  - Marking swizzle_if_required as static


Thanks, I'll squash this in.


I also think less_all_components should just return an enum like
{ MIXED, EQUAL, LESS, GREATER }, rather than setting a variable in
the class. It, as well as smaller/larger_constant, can then be
static functions outside of the visitor.

Yes, I'll try what it looks like with that.


I think the algorithm itself looks correct.

  src/glsl/opt_minmax.cpp | 145 +---
  1 file changed, 63 insertions(+), 82 deletions(-)

diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp
index 5656059..b987386 100644
--- a/src/glsl/opt_minmax.cpp
+++ b/src/glsl/opt_minmax.cpp
@@ -37,12 +37,10 @@
  #include "glsl_types.h"
  #include "main/macros.h"
  
-namespace

-{
-class minmax_range
-{
-public:
+namespace {
  
+class minmax_range {

+public:
 minmax_range(ir_constant *low = NULL, ir_constant *high = NULL)
 {
range[0] = low;
@@ -60,60 +58,45 @@ public:
  class ir_minmax_visitor : public ir_rvalue_enter_visitor {
  public:
 ir_minmax_visitor()
-  : progress(false)
-  , valid(true)
-   {
-   }
-
-   virtual ~ir_minmax_visitor()
+  : progress(false), valid(true)
 {
 }
  
-   bool

-   less_all_components(ir_constant *one, ir_constant *two);
-
-   ir_constant *
-   smaller_constant(ir_constant *one, ir_constant *two);
-
-   ir_constant *
-   larger_constant(ir_constant *one, ir_constant *two);
+   bool less_all_components(ir_constant *a, ir_constant *b);
+   ir_constant *smaller_constant(ir_constant *a, ir_constant *b);
+   ir_constant *larger_constant(ir_constant *a, ir_constant *b);
  
-   minmax_range

-   combine_range(minmax_range r0, minmax_range r1, bool ismin);
+   minmax_range combine_range(minmax_range r0, minmax_range r1, bool ismin);
  
-   minmax_range

-   range_intersection(minmax_range r0, minmax_range r1);
+   minmax_range range_intersection(minmax_range r0, minmax_range r1);
  
-   minmax_range

-   get_range(ir_rvalue *rval);
+   minmax_range get_range(ir_rvalue *rval);
  
-   ir_rvalue *

-   prune_expression(ir_expression *expr, minmax_range baserange);
+   ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange);
  
-   void

-   handle_rvalue(ir_rvalue **rvalue);
+   void handle_rvalue(ir_rvalue **rvalue);
  
 bool progress;

 bool valid;
  };
  
  /*

- * Returns true if all vector components of `one' are less than of `two'.
+ * Returns true if all vector components of `a' are less than of `b'.
   *
   * If there are vector components that are less while others are greater, the
   * visitor is marked invalid and no further changes will be made to the IR.
   */
  bool
-ir_minmax_visitor::less_all_components(ir_constant *one, ir_constant *two)
+ir_minmax_visitor::less_all_components(ir_constant *a, ir_constant *b)
  {
-   assert(one != NULL);
-   assert(two != NULL);
+   assert(a != NULL);
+   assert(b != NULL);
  
-   assert(one->type->base_type == two->type->base_type);

+   assert(a->type->base_type == b->type->base_type);
  
-   unsigned oneinc = one->type->is_scalar() ? 0 : 1;

-   unsigned twoinc = two->type->is_scalar() ? 0 : 1;
-   unsigned components = MAX2(one->type->components(), 
two->type->components());
+   unsigned a_inc = a->type->is_scalar() ? 0 : 1;
+   unsigned b_inc = b->type->is_scalar() ? 0 : 1;
+   unsigned components = MAX2(a->type->components(), b->type->components());
  
 /* No early escape. We need to go through all components and mark the

  * visitor as invalid if comparison yields less for some components and
@@ -127,34 +110,34 @@ ir_minmax_visitor::less_all_components(ir_constant *one, 
ir_constant *two)
  
 for (unsigned i = 0, c0 = 0, c1 = 0;

  i < components;
-c0 += oneinc, c1 += twoinc, ++i) {
-  switch (one->type->base_type) {
+c0 += a_inc, c1 += b_inc, ++i) {
+  switch (a->type->base_type) {
case GLSL_TYPE_UINT:
- if (one->value.u[c0] < two->value.u[c1])
+ if (a->value.u[c0] < b->value.u[c1])
  foundless = true;
- else if (one->value.u[c0] > two->value.u[c1])
+ else if (a->value.u[c0] > b->value.u[c1])
  foundgreater = true;
   else
  foundequal = true;
- continue;
+ break;
case GLSL_TYPE_INT:
- if (one->value.i[c0] < two->value.i[c1])
+ if (a->value.i[c0] < b->value.i[c1])
  foundless = true;
- else if (one->value.i[c0] > two->value.i[c1])
+ else if (a->value.i[c0] > b->value.i[c1])
  foundgreater = true;
   else
  foundequal = t

Re: [Mesa-dev] [PATCH 1/9] glsl: Optimize min/max expression trees

2014-08-18 Thread Petri Latvala

On 08/14/2014 04:33 AM, Ian Romanick wrote:

On 07/29/2014 02:36 AM, Petri Latvala wrote:

Add an optimization pass that drops min/max expression operands that
can be proven to not contribute to the final result. The algorithm is
similar to alpha-beta pruning on a minmax search, from the field of
AI.

This optimization pass can optimize min/max expressions where operands
are min/max expressions. Such code can appear in shaders by itself, or
as the result of clamp() or AMD_shader_trinary_minmax functions.

This optimization pass improves the generated code for piglit's
AMD_shader_trinary_minmax tests as follows:

total instructions in shared programs: 75 -> 67 (-10.67%)
instructions in affected programs: 60 -> 52 (-13.33%)
GAINED:0
LOST:  0

All tests (max3, min3, mid3) improved.

And I assume no piglit regressions?


Indeed no regressions, or new successes. I wrote that in the cover 
letter, I should have written it also in this patch's commit message...




Also... have you tried this in combination with Abdiel's related work on
saturates?


Tested the combination now, after some fighting with shader-db. The 
results are the same, except :

One shader from
Dungeon Defenders is hurt by shader-db metrics (26 -> 28), because of
dropping of a (constant float (0.0)) operand, which was
compiled to a saturate modifier.


This shader compiled into the same code with or without my patches.

Talked with Abdiel about the combination, recapping here: Our changes 
are orthogonal and not conflicting, so we can both proceed at our own paces.




Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76861
Signed-off-by: Petri Latvala 
---
  src/glsl/Makefile.sources   |   1 +
  src/glsl/glsl_parser_extras.cpp |   1 +
  src/glsl/ir_optimization.h  |   1 +
  src/glsl/opt_minmax.cpp | 395 
  4 files changed, 398 insertions(+)
  create mode 100644 src/glsl/opt_minmax.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index b54eae7..1ee80a3 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -95,6 +95,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/opt_flip_matrices.cpp \
$(GLSL_SRCDIR)/opt_function_inlining.cpp \
$(GLSL_SRCDIR)/opt_if_simplification.cpp \
+   $(GLSL_SRCDIR)/opt_minmax.cpp \
$(GLSL_SRCDIR)/opt_noop_swizzle.cpp \
$(GLSL_SRCDIR)/opt_rebalance_tree.cpp \
$(GLSL_SRCDIR)/opt_redundant_jumps.cpp \
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 890123a..9f57ef3 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1561,6 +1561,7 @@ do_common_optimization(exec_list *ir, bool linked,
 else
progress = do_constant_variable_unlinked(ir) || progress;
 progress = do_constant_folding(ir) || progress;
+   progress = do_minmax_prune(ir) || progress;
 progress = do_cse(ir) || progress;
 progress = do_rebalance_tree(ir) || progress;
 progress = do_algebraic(ir, native_integers, options) || progress;
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index b83c225..9d22585 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -98,6 +98,7 @@ bool opt_flatten_nested_if_blocks(exec_list *instructions);
  bool do_discard_simplification(exec_list *instructions);
  bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0);
  bool do_mat_op_to_vec(exec_list *instructions);
+bool do_minmax_prune(exec_list *instructions);
  bool do_noop_swizzle(exec_list *instructions);
  bool do_structure_splitting(exec_list *instructions);
  bool do_swizzle_swizzle(exec_list *instructions);
diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp
new file mode 100644
index 000..5656059
--- /dev/null
+++ b/src/glsl/opt_minmax.cpp
@@ -0,0 +1,395 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABI

[Mesa-dev] [PATCH 1/9] glsl: Optimize min/max expression trees

2014-07-29 Thread Petri Latvala
Add an optimization pass that drops min/max expression operands that
can be proven to not contribute to the final result. The algorithm is
similar to alpha-beta pruning on a minmax search, from the field of
AI.

This optimization pass can optimize min/max expressions where operands
are min/max expressions. Such code can appear in shaders by itself, or
as the result of clamp() or AMD_shader_trinary_minmax functions.

This optimization pass improves the generated code for piglit's
AMD_shader_trinary_minmax tests as follows:

total instructions in shared programs: 75 -> 67 (-10.67%)
instructions in affected programs: 60 -> 52 (-13.33%)
GAINED:0
LOST:  0

All tests (max3, min3, mid3) improved.

A full shader-db run:

total instructions in shared programs: 4293603 -> 4293575 (-0.00%)
instructions in affected programs: 1188 -> 1160 (-2.36%)
GAINED:0
LOST:  0

Improvements happen in Guacamelee and Serious Sam 3. One shader from
Dungeon Defenders is hurt by shader-db metrics (26 -> 28), because of
dropping of a (constant float (0.0)) operand, which was
compiled to a saturate modifier.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76861
Signed-off-by: Petri Latvala 
---
 src/glsl/Makefile.sources   |   1 +
 src/glsl/glsl_parser_extras.cpp |   1 +
 src/glsl/ir_optimization.h  |   1 +
 src/glsl/opt_minmax.cpp | 395 
 4 files changed, 398 insertions(+)
 create mode 100644 src/glsl/opt_minmax.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index b54eae7..1ee80a3 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -95,6 +95,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/opt_flip_matrices.cpp \
$(GLSL_SRCDIR)/opt_function_inlining.cpp \
$(GLSL_SRCDIR)/opt_if_simplification.cpp \
+   $(GLSL_SRCDIR)/opt_minmax.cpp \
$(GLSL_SRCDIR)/opt_noop_swizzle.cpp \
$(GLSL_SRCDIR)/opt_rebalance_tree.cpp \
$(GLSL_SRCDIR)/opt_redundant_jumps.cpp \
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 890123a..9f57ef3 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1561,6 +1561,7 @@ do_common_optimization(exec_list *ir, bool linked,
else
   progress = do_constant_variable_unlinked(ir) || progress;
progress = do_constant_folding(ir) || progress;
+   progress = do_minmax_prune(ir) || progress;
progress = do_cse(ir) || progress;
progress = do_rebalance_tree(ir) || progress;
progress = do_algebraic(ir, native_integers, options) || progress;
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index b83c225..9d22585 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -98,6 +98,7 @@ bool opt_flatten_nested_if_blocks(exec_list *instructions);
 bool do_discard_simplification(exec_list *instructions);
 bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0);
 bool do_mat_op_to_vec(exec_list *instructions);
+bool do_minmax_prune(exec_list *instructions);
 bool do_noop_swizzle(exec_list *instructions);
 bool do_structure_splitting(exec_list *instructions);
 bool do_swizzle_swizzle(exec_list *instructions);
diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp
new file mode 100644
index 000..5656059
--- /dev/null
+++ b/src/glsl/opt_minmax.cpp
@@ -0,0 +1,395 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file opt_minmax.cpp
+ *
+ * Drop operands from an expression tree of only min/max operations if they
+ * can be proven to not contribute to the final result.
+ *
+ * The algorithm is similar to alpha-beta pruning on a minmax s

[Mesa-dev] [PATCH 9/9] glsl: Add tests for minmax prune

2014-07-29 Thread Petri Latvala
tests/minmax/create_test_cases.py generates the following tests:

multiple_min*.opt_test:
 Construct a tree of min expressions for all permutations of a var_ref
 and three constants. They should all optimize to a single min with
 the variable and the smallest constant.
multiple_max*.opt_test:
 Same as above, for max.
mid3opt*.opt_test:
 Test that code generated from a mid3() for two constants and a
 var_ref optimizes to a single max and a single min.
mixed_vectors*.opt_test:
 Test that the optimization pass doesn't modify expression trees with
 constant vectors where some components compare as less, some as
 greater.

Signed-off-by: Petri Latvala 
---
 src/glsl/tests/minmax/.gitignore   |   3 +
 src/glsl/tests/minmax/create_test_cases.py | 151 +
 2 files changed, 154 insertions(+)
 create mode 100644 src/glsl/tests/minmax/.gitignore
 create mode 100644 src/glsl/tests/minmax/create_test_cases.py

diff --git a/src/glsl/tests/minmax/.gitignore b/src/glsl/tests/minmax/.gitignore
new file mode 100644
index 000..e98df62
--- /dev/null
+++ b/src/glsl/tests/minmax/.gitignore
@@ -0,0 +1,3 @@
+*.opt_test
+*.expected
+*.out
diff --git a/src/glsl/tests/minmax/create_test_cases.py 
b/src/glsl/tests/minmax/create_test_cases.py
new file mode 100644
index 000..4f78980
--- /dev/null
+++ b/src/glsl/tests/minmax/create_test_cases.py
@@ -0,0 +1,151 @@
+# coding=utf-8
+#
+# Copyright © 2014 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import os
+import os.path
+import re
+import subprocess
+import sys
+import itertools
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+from sexps import *
+from test_case_generator import *
+
+def test_multiple_max():
+doc_string = """Test that multiple constants in multiple max expressions 
are reduced to a single max."""
+
+operands = [const_float(1),
+const_float(2),
+const_float(3),
+['var_ref', 'a']]
+
+c = 1
+for ops in itertools.permutations(operands):
+maxtree1 = reduce(lambda a, b: max_(a, b, 'float'), ops)
+maxtree2 = reduce(lambda a, b: max_(b, a, 'float'), ops)
+
+expected = max_(const_float(3), ['var_ref', 'a'], 'float')
+
+input_sexp = make_test_case('main', 'void', (
+assign_x('b', maxtree1) +
+assign_x('c', maxtree2)
+))
+expected_sexp = make_test_case('main', 'void', (
+assign_x('b', expected) +
+assign_x('c', expected)
+))
+
+create_test_case(doc_string, input_sexp, expected_sexp, 
'multiple_max{0}'.format(c), 'do_minmax_prune')
+c += 1
+
+def test_multiple_min():
+doc_string = """Test that multiple constants in multiple min expressions 
are reduced to a single min."""
+
+operands = [const_float(1),
+const_float(2),
+const_float(3),
+['var_ref', 'a']]
+
+c = 1
+for ops in itertools.permutations(operands):
+mintree1 = reduce(lambda a, b: min_(a, b, 'float'), ops)
+mintree2 = reduce(lambda a, b: min_(b, a, 'float'), ops)
+
+expected = min_(const_float(1), ['var_ref', 'a'], 'float')
+
+input_sexp = make_test_case('main', 'void', (
+assign_x('b', mintree1) +
+assign_x('c', mintree2)
+))
+expected_sexp = make_test_case('main', 'void', (
+assign_x('b', expe

[Mesa-dev] [PATCH 4/9] glsl: Make compare_ir sort expression operands for commutative operations

2014-07-29 Thread Petri Latvala
Sort expression operands when possible so that building expected IR
sexps doesn't need to know which ordering will be produced by an
optimization pass.

Signed-off-by: Petri Latvala 
---
 src/glsl/tests/compare_ir |  4 ++--
 src/glsl/tests/sexps.py   | 37 +
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/src/glsl/tests/compare_ir b/src/glsl/tests/compare_ir
index a40fc81..0b63fab 100755
--- a/src/glsl/tests/compare_ir
+++ b/src/glsl/tests/compare_ir
@@ -38,9 +38,9 @@ if len(sys.argv) != 3:
 exit(1)
 
 with open(sys.argv[1]) as f:
-ir1 = sort_decls(parse_sexp(f.read()))
+ir1 = sort_decls(sort_commutatives(parse_sexp(f.read(
 with open(sys.argv[2]) as f:
-ir2 = sort_decls(parse_sexp(f.read()))
+ir2 = sort_decls(sort_commutatives(parse_sexp(f.read(
 
 if ir1 == ir2:
 exit(0)
diff --git a/src/glsl/tests/sexps.py b/src/glsl/tests/sexps.py
index a714af8..60c54bd 100644
--- a/src/glsl/tests/sexps.py
+++ b/src/glsl/tests/sexps.py
@@ -101,3 +101,40 @@ def sort_decls(sexp):
 other_code.append(s)
 return sorted(decls) + other_code
 
+commutatives = [
+'+',
+'*',
+'imul_high',
+'carry',
+'==',
+'!=',
+'all_equal',
+'any_nequal',
+'&',
+'^',
+'|',
+'&&',
+'^^',
+'||',
+'dot',
+'min',
+'max'
+]
+
+def sort_commutatives(sexp):
+"""Sort operands of expressions that are commutative in sexp.
+
+This is used to work around the fact that optimization passes might
+reorder operands.
+"""
+if not isinstance(sexp, list): return sexp
+
+code = []
+for s in sexp:
+sd = sort_commutatives(s)
+# An expression is [expression, type, operation, operand1, ...]
+if isinstance(sd, list) and len(sd) >= 3 and sd[0] == 'expression' and 
sd[2] in commutatives:
+code.append(['expression', sd[1:2]] + sorted(sd[3:]))
+else:
+code.append(sd)
+return code
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/9] glsl: Refactor the python test case generator

2014-07-29 Thread Petri Latvala
Move the IR sexp builder helpers and test script creation parts of
tests/lower_jumps/create_test_cases.py into tests/test_case_generator.py

No functional changes.

Signed-off-by: Petri Latvala 
---
 src/glsl/tests/lower_jumps/create_test_cases.py | 336 +++-
 src/glsl/tests/test_case_generator.py   | 293 +
 2 files changed, 334 insertions(+), 295 deletions(-)
 create mode 100644 src/glsl/tests/test_case_generator.py

diff --git a/src/glsl/tests/lower_jumps/create_test_cases.py 
b/src/glsl/tests/lower_jumps/create_test_cases.py
index 3be1079..9783627 100644
--- a/src/glsl/tests/lower_jumps/create_test_cases.py
+++ b/src/glsl/tests/lower_jumps/create_test_cases.py
@@ -27,278 +27,9 @@ import re
 import subprocess
 import sys
 
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access 
to sexps.py, which is in parent dir
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access 
to sexps.py and test_case_generator.py, which are in parent dir
 from sexps import *
-
-def make_test_case(f_name, ret_type, body):
-"""Create a simple optimization test case consisting of a single
-function with the given name, return type, and body.
-
-Global declarations are automatically created for any undeclared
-variables that are referenced by the function.  All undeclared
-variables are assumed to be floats.
-"""
-check_sexp(body)
-declarations = {}
-def make_declarations(sexp, already_declared = ()):
-if isinstance(sexp, list):
-if len(sexp) == 2 and sexp[0] == 'var_ref':
-if sexp[1] not in already_declared:
-declarations[sexp[1]] = [
-'declare', ['in'], 'float', sexp[1]]
-elif len(sexp) == 4 and sexp[0] == 'assign':
-assert sexp[2][0] == 'var_ref'
-if sexp[2][1] not in already_declared:
-declarations[sexp[2][1]] = [
-'declare', ['out'], 'float', sexp[2][1]]
-make_declarations(sexp[3], already_declared)
-else:
-already_declared = set(already_declared)
-for s in sexp:
-if isinstance(s, list) and len(s) >= 4 and \
-s[0] == 'declare':
-already_declared.add(s[3])
-else:
-make_declarations(s, already_declared)
-make_declarations(body)
-return declarations.values() + \
-[['function', f_name, ['signature', ret_type, ['parameters'], body]]]
-
-
-# The following functions can be used to build expressions.
-
-def const_float(value):
-"""Create an expression representing the given floating point value."""
-return ['constant', 'float', ['{0:.6f}'.format(value)]]
-
-def const_bool(value):
-"""Create an expression representing the given boolean value.
-
-If value is not a boolean, it is converted to a boolean.  So, for
-instance, const_bool(1) is equivalent to const_bool(True).
-"""
-return ['constant', 'bool', ['{0}'.format(1 if value else 0)]]
-
-def gt_zero(var_name):
-"""Create Construct the expression var_name > 0"""
-return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)]
-
-
-# The following functions can be used to build complex control flow
-# statements.  All of these functions return statement lists (even
-# those which only create a single statement), so that statements can
-# be sequenced together using the '+' operator.
-
-def return_(value = None):
-"""Create a return statement."""
-if value is not None:
-return [['return', value]]
-else:
-return [['return']]
-
-def break_():
-"""Create a break statement."""
-return ['break']
-
-def continue_():
-"""Create a continue statement."""
-return ['continue']
-
-def simple_if(var_name, then_statements, else_statements = None):
-"""Create a statement of the form
-
-if (var_name > 0.0) {
-   
-} else {
-   
-}
-
-else_statements may be omitted.
-"""
-if else_statements is None:
-else_statements = []
-check_sexp(then_statements)
-check_sexp(else_statements)
-return [['if', gt_zero(var_name), then_statements, else_statements]]
-
-def loop(statements):
-"""Create a loop containing the g

[Mesa-dev] [PATCH 7/9] glsl: Add min and max IR sexp creation functions to test_case_generator.py

2014-07-29 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/glsl/tests/test_case_generator.py | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/glsl/tests/test_case_generator.py 
b/src/glsl/tests/test_case_generator.py
index 6633702..5f550df 100644
--- a/src/glsl/tests/test_case_generator.py
+++ b/src/glsl/tests/test_case_generator.py
@@ -86,6 +86,21 @@ def gt_zero(var_name):
 """Create Construct the expression var_name > 0"""
 return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)]
 
+def min_(a, b, type):
+"""Create an expression min(a, b), of type 'type'"""
+return ['expression', type, 'min', a, b]
+
+def max_(a, b, type):
+"""Create an expression max(a, b), of type 'type'"""
+return ['expression', type, 'max', a, b]
+
+def minf(a, b):
+"""Helper function for min_(a, b, 'float')"""
+return min_(a, b, 'float')
+
+def maxf(a, b):
+"""Helper function for max_(a, b, 'float')"""
+return max_(a, b, 'float')
 
 # The following functions can be used to build complex control flow
 # statements.  All of these functions return statement lists (even
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/9] Optimize min/max expressions

2014-07-29 Thread Petri Latvala
Continuing the tale of fixing
https://bugs.freedesktop.org/show_bug.cgi?id=76861

Instead of making an IR expression type for mid3, I implemented an
optimization pass that optimizes general min/max expression trees, by
dropping operands that are proven to never be selected as the final
result.

This optimization pass not only improves code generated from mid3(),
but also other sources of such min/max trees, such as clamp(), or
plain min() and max(). Commit message for patch 1/9 contains shader-db
instruction count results, repeated here:

total instructions in shared programs: 4293603 -> 4293575 (-0.00%)
instructions in affected programs: 1188 -> 1160 (-2.36%)
GAINED:0
LOST:  0

Serious Sam 3 and Guacamelee were improved, Dungeon Defenders was made
a bit worse. There was a max(foo, 0.0) in the shader where the 0.0
constant was dropped, which then resulted in shorter IR code but
longer shader binary.

No changes in piglit results. Patch 9 adds unit tests for the pass.

Petri Latvala (9):
  glsl: Optimize min/max expression trees
  glsl: Fix directory handling in optimization-test
  glsl: Refactor the python test case generator
  glsl: Make compare_ir sort expression operands for commutative
operations
  glsl: Generalize assignment and declaration IR sexp creation functions
  glsl: Add const_vec4 IR sexp creation function to
test_case_generator.py
  glsl: Add min and max IR sexp creation functions to
test_case_generator.py
  glsl: Add support for do_minmax_prune in test_optpass
  glsl: Add tests for minmax prune

 src/glsl/Makefile.sources   |   1 +
 src/glsl/glsl_parser_extras.cpp |   1 +
 src/glsl/ir_optimization.h  |   1 +
 src/glsl/opt_minmax.cpp | 395 
 src/glsl/test_optpass.cpp   |   2 +
 src/glsl/tests/compare_ir   |   4 +-
 src/glsl/tests/lower_jumps/create_test_cases.py | 336 +++-
 src/glsl/tests/minmax/.gitignore|   3 +
 src/glsl/tests/minmax/create_test_cases.py  | 151 +
 src/glsl/tests/optimization-test|   2 +-
 src/glsl/tests/sexps.py |  37 +++
 src/glsl/tests/test_case_generator.py   | 325 +++
 12 files changed, 960 insertions(+), 298 deletions(-)
 create mode 100644 src/glsl/opt_minmax.cpp
 create mode 100644 src/glsl/tests/minmax/.gitignore
 create mode 100644 src/glsl/tests/minmax/create_test_cases.py
 create mode 100644 src/glsl/tests/test_case_generator.py

-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/9] glsl: Fix directory handling in optimization-test

2014-07-29 Thread Petri Latvala
optimization-test loops over directories that contain a file named
create_test_cases.py. For each occurrence, it enters the directory
that contains the file and executes it, and is then supposed to return
to the previous directory.

The name .. was hardcoded in the script, and should be
../.. instead. Hardcoding that is finicky too, so wrap the directory change
and script execution in a subshell instead.

In case of errors executing create_test_cases.py, exit with status 1
to make the test run communicate failure properly.

Signed-off-by: Petri Latvala 
---
 src/glsl/tests/optimization-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/tests/optimization-test b/src/glsl/tests/optimization-test
index 26a51be..12ecc5e 100755
--- a/src/glsl/tests/optimization-test
+++ b/src/glsl/tests/optimization-test
@@ -12,7 +12,7 @@ pass=0
 echo "==   Generating tests  =="
 for dir in tests/*/; do
 if [ -e "${dir}create_test_cases.py" ]; then
-cd $dir; $PYTHON2 create_test_cases.py; cd ..
+(cd $dir; $PYTHON2 create_test_cases.py) || exit 1
 fi
 echo "$dir"
 done
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/9] glsl: Add support for do_minmax_prune in test_optpass

2014-07-29 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/glsl/test_optpass.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp
index e4878bf..ef62752 100644
--- a/src/glsl/test_optpass.cpp
+++ b/src/glsl/test_optpass.cpp
@@ -131,6 +131,8 @@ do_optimization(struct exec_list *ir, const char 
*optimization,
   return lower_quadop_vector(ir, int_0 != 0);
} else if (strcmp(optimization, "optimize_redundant_jumps") == 0) {
   return optimize_redundant_jumps(ir);
+   } else if (strcmp(optimization, "do_minmax_prune") == 0) {
+  return do_minmax_prune(ir);
} else {
   printf("Unrecognized optimization %s\n", optimization);
   exit(EXIT_FAILURE);
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/9] glsl: Generalize assignment and declaration IR sexp creation functions

2014-07-29 Thread Petri Latvala
Already-existing and used declare_temp() and assign_x() functions can
then be implemented in terms of the general functions.

Signed-off-by: Petri Latvala 
---
 src/glsl/tests/test_case_generator.py | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/glsl/tests/test_case_generator.py 
b/src/glsl/tests/test_case_generator.py
index d6754fc8..5df8420 100644
--- a/src/glsl/tests/test_case_generator.py
+++ b/src/glsl/tests/test_case_generator.py
@@ -127,19 +127,32 @@ def loop(statements):
 check_sexp(statements)
 return [['loop', statements]]
 
+def declare(var_type, var_name, decltype):
+"""Create a declaration of the form
+
+(declare ()  )
+"""
+return [['declare', [decltype], var_type, var_name]]
+
 def declare_temp(var_type, var_name):
 """Create a declaration of the form
 
 (declare (temporary)   to the variable . The
+assignment uses  (a string) as the write mask.
+"""
+check_sexp(value)
+return [['assign', [mask], ['var_ref', var_name], value]]
 
 def assign_x(var_name, value):
 """Create a statement that assigns  to the variable
 .  The assignment uses the mask (x).
 """
-check_sexp(value)
-return [['assign', ['x'], ['var_ref', var_name], value]]
+return assign(var_name, 'x', value)
 
 def complex_if(var_prefix, statements):
 """Create a statement of the form
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/9] glsl: Add const_vec4 IR sexp creation function to test_case_generator.py

2014-07-29 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/glsl/tests/test_case_generator.py | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/glsl/tests/test_case_generator.py 
b/src/glsl/tests/test_case_generator.py
index 5df8420..6633702 100644
--- a/src/glsl/tests/test_case_generator.py
+++ b/src/glsl/tests/test_case_generator.py
@@ -78,6 +78,10 @@ def const_bool(value):
 """
 return ['constant', 'bool', ['{0}'.format(1 if value else 0)]]
 
+def const_vec4(val1, val2, val3, val4):
+"""Create an expression representing the given values as a vec4."""
+return ['constant', 'vec4', map(lambda x: '{0:.6f}'.format(x), [val1, 
val2, val3, val4])]
+
 def gt_zero(var_name):
 """Create Construct the expression var_name > 0"""
 return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)]
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/14] glsl: Implement saturate as ir_binop_saturate

2014-06-24 Thread Petri Latvala

On 06/24/2014 08:27 AM, Abdiel Janulgue wrote:

Now that we have the ir_binop_saturate implemented as a single
instruction, generate the correct simplified expression.



s/binop_saturate/unop_saturate/  in subject and commit text.


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/13] mesa: Remove unused functions from perfomance query code.

2014-06-16 Thread Petri Latvala

On 06/15/2014 08:17 PM, Matt Turner wrote:

Perhaps useful for debugging? Never used otherwise. Added by commit
8cf5bdad.
---
  src/mesa/main/performance_monitor.c | 13 -
  1 file changed, 13 deletions(-)

diff --git a/src/mesa/main/performance_monitor.c 
b/src/mesa/main/performance_monitor.c
index 9d1a6b4..c26eda4 100644
--- a/src/mesa/main/performance_monitor.c
+++ b/src/mesa/main/performance_monitor.c
@@ -164,19 +164,6 @@ counterid_to_index(GLuint counterid)
 return counterid - 1;
  }
  
-static inline GLuint

-index_to_counterid(GLuint index)
-{
-   return index + 1;
-}
-
-static inline bool
-counterid_valid(const struct gl_perf_monitor_group *group_obj,
-GLuint counterid)
-{
-   return get_counter(group_obj, counterid_to_index(counterid)) != NULL;
-}
-
  
/*/
  
  void GLAPIENTRY


Oh, bugger. Those functions were used at some point, but their lack of 
use after refactoring never came up.


Reviewed-by: Petri Latvala 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] glsl: parser changes for GL_ARB_explicit_uniform_location

2014-06-12 Thread Petri Latvala

Reviewed-by: Petri Latvala 


On 06/12/2014 10:46 AM, Tapani Pälli wrote:

Patch adds a preprocessor define for the extension and stores explicit
location data for uniforms during AST->HIR conversion. It also sets
layout token to be available when having the extension in place.

v2: change parser check to require GLSL 330 or enabling
 GL_ARB_explicit_attrib_location (Ian)
v3: fix the check and comment in AST->HIR (Petri)

Signed-off-by: Tapani Pälli 
---
  src/glsl/ast_to_hir.cpp   | 35 +++
  src/glsl/glcpp/glcpp-parse.y  |  3 +++
  src/glsl/glsl_lexer.ll|  1 +
  src/glsl/glsl_parser_extras.h | 15 +++
  4 files changed, 54 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 140bb74..9567f9b 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2181,6 +2181,41 @@ validate_explicit_location(const struct 
ast_type_qualifier *qual,
  {
 bool fail = false;
  
+   /* Checks for GL_ARB_explicit_uniform_location. */

+   if (qual->flags.q.uniform) {
+  if (!state->check_explicit_uniform_location_allowed(loc, var))
+ return;
+
+  const struct gl_context *const ctx = state->ctx;
+  unsigned max_loc = qual->location + var->type->uniform_locations() - 1;
+
+  /* ARB_explicit_uniform_location specification states:
+   *
+   * "The explicitly defined locations and the generated locations
+   * must be in the range of 0 to MAX_UNIFORM_LOCATIONS minus one."
+   *
+   * "Valid locations for default-block uniform variable locations
+   * are in the range of 0 to the implementation-defined maximum
+   * number of uniform locations."
+   */
+  if (qual->location < 0) {
+ _mesa_glsl_error(loc, state,
+  "explicit location < 0 for uniform %s", var->name);
+ return;
+  }
+
+  if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
+ _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
+  ">= MAX_UNIFORM_LOCATIONS (%u)", var->name,
+  ctx->Const.MaxUserAssignableUniformLocations);
+ return;
+  }
+
+  var->data.explicit_location = true;
+  var->data.location = qual->location;
+  return;
+   }
+
 /* Between GL_ARB_explicit_attrib_location an
  * GL_ARB_separate_shader_objects, the inputs and outputs of any shader
  * stage can be assigned explicit locations.  The checking here associates
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 9887583..dacb954 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2089,6 +2089,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
  if (extensions->ARB_explicit_attrib_location)
 add_builtin_define(parser, "GL_ARB_explicit_attrib_location", 
1);
  
+	  if (extensions->ARB_explicit_uniform_location)

+add_builtin_define(parser, "GL_ARB_explicit_uniform_location", 
1);
+
  if (extensions->ARB_shader_texture_lod)
 add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1);
  
diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll

index 6c3f9b6..db7b1d1 100644
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -396,6 +396,7 @@ layout  {
  || yyextra->AMD_conservative_depth_enable
  || yyextra->ARB_conservative_depth_enable
  || yyextra->ARB_explicit_attrib_location_enable
+ || yyextra->ARB_explicit_uniform_location_enable
|| yyextra->has_separate_shader_objects()
  || yyextra->ARB_uniform_buffer_object_enable
  || yyextra->ARB_fragment_coord_conventions_enable
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index e26460e..b73f816 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -151,6 +151,21 @@ struct _mesa_glsl_parse_state {
return true;
 }
  
+   bool check_explicit_uniform_location_allowed(YYLTYPE *locp,

+const ir_variable *var)
+   {
+  if (!this->has_explicit_attrib_location() ||
+  !this->ARB_explicit_uniform_location_enable) {
+ _mesa_glsl_error(locp, this,
+  "uniform explicit location requires "
+  "GL_ARB_explicit_uniform_location and either "
+  "GL_ARB_explicit_attrib_location or GLSL 330.");
+ return false;
+  }
+
+  return true;
+   }
+
 bool has_explicit_attrib_l

Re: [Mesa-dev] [PATCH 10/10] docs: update ARB_explicit_uniform_location status

2014-06-12 Thread Petri Latvala

Reviewed-by: Petri Latvala 

On 04/09/2014 12:56 PM, Tapani Pälli wrote:

Signed-off-by: Tapani Pälli 
---
  docs/GL3.txt | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index bf51e3a..245a045 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -148,7 +148,7 @@ GL 4.3:
GL_ARB_compute_shaderstarted (Paul Berry)
GL_ARB_copy_imagenot started
GL_KHR_debug DONE (all drivers)
-  GL_ARB_explicit_uniform_location not started
+  GL_ARB_explicit_uniform_location DONE (all drivers that 
support GLSL)
GL_ARB_fragment_layer_viewport   not started
GL_ARB_framebuffer_no_attachmentsnot started
GL_ARB_internalformat_query2 not started


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] Enable GL_ARB_explicit_uniform_location in the drivers.

2014-06-12 Thread Petri Latvala

Reviewed-by: Petri Latvala 

On 06/09/2014 01:06 PM, Tapani Pälli wrote:

v2: enable also for i915 (Ian)

Signed-off-by: Tapani Pälli 
---
  src/mesa/drivers/dri/i915/intel_extensions.c | 1 +
  src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
  src/mesa/state_tracker/st_extensions.c   | 1 +
  3 files changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i915/intel_extensions.c 
b/src/mesa/drivers/dri/i915/intel_extensions.c
index 76f608e..de716a7 100644
--- a/src/mesa/drivers/dri/i915/intel_extensions.c
+++ b/src/mesa/drivers/dri/i915/intel_extensions.c
@@ -46,6 +46,7 @@ intelInitExtensions(struct gl_context *ctx)
  
 ctx->Extensions.ARB_draw_elements_base_vertex = true;

 ctx->Extensions.ARB_explicit_attrib_location = true;
+   ctx->Extensions.ARB_explicit_uniform_location = true;
 ctx->Extensions.ARB_framebuffer_object = true;
 ctx->Extensions.ARB_internalformat_query = true;
 ctx->Extensions.ARB_map_buffer_range = true;
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 39d0ab5..9babe64 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -170,6 +170,7 @@ intelInitExtensions(struct gl_context *ctx)
 ctx->Extensions.ARB_draw_instanced = true;
 ctx->Extensions.ARB_ES2_compatibility = true;
 ctx->Extensions.ARB_explicit_attrib_location = true;
+   ctx->Extensions.ARB_explicit_uniform_location = true;
 ctx->Extensions.ARB_fragment_coord_conventions = true;
 ctx->Extensions.ARB_fragment_program = true;
 ctx->Extensions.ARB_fragment_program_shadow = true;
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 12ba82d..e9a74c5 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -543,6 +543,7 @@ void st_init_extensions(struct st_context *st)
 ctx->Extensions.ARB_ES2_compatibility = GL_TRUE;
 ctx->Extensions.ARB_draw_elements_base_vertex = GL_TRUE;
 ctx->Extensions.ARB_explicit_attrib_location = GL_TRUE;
+   ctx->Extensions.ARB_explicit_uniform_location = GL_TRUE;
 ctx->Extensions.ARB_fragment_coord_conventions = GL_TRUE;
 ctx->Extensions.ARB_fragment_program = GL_TRUE;
 ctx->Extensions.ARB_fragment_shader = GL_TRUE;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: parser changes for GL_ARB_explicit_uniform_location

2014-06-11 Thread Petri Latvala


On 06/05/2014 08:08 AM, Tapani Pälli wrote:

Patch adds a preprocessor define for the extension and stores explicit
location data for uniforms during AST->HIR conversion. It also sets
layout token to be available when having the extension in place.

v2: change parser check to require GLSL 330 or enabling
 GL_ARB_explicit_attrib_location (Ian)

Signed-off-by: Tapani Pälli 
---
  src/glsl/ast_to_hir.cpp   | 36 
  src/glsl/glcpp/glcpp-parse.y  |  3 +++
  src/glsl/glsl_lexer.ll|  1 +
  src/glsl/glsl_parser_extras.h | 15 +++
  4 files changed, 55 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index d1c77f1..04452b8 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2182,6 +2182,42 @@ validate_explicit_location(const struct 
ast_type_qualifier *qual,
  {
 bool fail = false;
  
+   /* Checks for GL_ARB_explicit_uniform_location. */

+   if (qual->flags.q.uniform) {
+  if (!state->check_explicit_uniform_location_allowed(loc, var))
+ return;
+
+  const struct gl_context *const ctx = state->ctx;
+  unsigned max_loc = qual->location + var->type->component_slots() - 1;


var->type->uniform_locations() ?



+
+  /* ARB_explicit_uniform_location specification states:
+   *
+   * "The explicitly defined locations and the generated locations
+   * must be in the range of 0 to MAX_UNIFORM_LOCATIONS minus one."
+   *
+   * "Valid locations for default-block uniform variable locations
+   * are in the range of 0 to the implementation-defined maximum
+   * number of uniform locations."
+   */
+  if (qual->location < 0) {
+ _mesa_glsl_error(loc, state,
+  "explicit location < 0 for uniform %s", var->name);
+ return;
+  }
+
+  if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
+ _mesa_glsl_error(loc, state, "location qualifier for uniform %s "
+  ">= MAX_UNIFORM_LOCATIONS (%u)",
+  var->name,
+  ctx->Const.MaxUserAssignableUniformLocations);
+ return;
+  }


This error message might be too confusing.

If MAX_UNIFORM_LOCATIONS were 6:

uniform layout(location=3) float x[10];

Error: Location qualifier for uniform x >= MAX_UNIFORM_LOCATIONS(6)

"But 3 is not >= 6!" says the confused developer.

Maybe "location qualifier or automatically assigned location" or something?



+
+  var->data.explicit_location = true;
+  var->data.location = qual->location;
+  return;
+   }
+
 /* Between GL_ARB_explicit_attrib_location an
  * GL_ARB_separate_shader_objects, the inputs and outputs of any shader
  * stage can be assigned explicit locations.  The checking here associates
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 9887583..dacb954 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2089,6 +2089,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
  if (extensions->ARB_explicit_attrib_location)
 add_builtin_define(parser, "GL_ARB_explicit_attrib_location", 
1);
  
+	  if (extensions->ARB_explicit_uniform_location)

+add_builtin_define(parser, "GL_ARB_explicit_uniform_location", 
1);
+
  if (extensions->ARB_shader_texture_lod)
 add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1);
  
diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll

index 6c3f9b6..db7b1d1 100644
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -396,6 +396,7 @@ layout  {
  || yyextra->AMD_conservative_depth_enable
  || yyextra->ARB_conservative_depth_enable
  || yyextra->ARB_explicit_attrib_location_enable
+ || yyextra->ARB_explicit_uniform_location_enable
|| yyextra->has_separate_shader_objects()
  || yyextra->ARB_uniform_buffer_object_enable
  || yyextra->ARB_fragment_coord_conventions_enable
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index e26460e..b73f816 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -151,6 +151,21 @@ struct _mesa_glsl_parse_state {
return true;
 }
  
+   bool check_explicit_uniform_location_allowed(YYLTYPE *locp,

+const ir_variable *var)
+   {
+  if (!this->has_explicit_attrib_location() ||
+  !this->ARB_explicit_uniform_location_enable) {
+ _mesa_glsl_error(locp, this,
+  "uniform explicit location requires "
+  "GL_ARB_explicit_uniform_location and either "
+  "GL_ARB_explicit_attrib_location or GLSL 330.");
+  

Re: [Mesa-dev] [PATCH 7/9] glsl: Make foreach macros usable from C by adding struct keyword.

2014-06-11 Thread Petri Latvala

On 06/10/2014 10:20 PM, Ian Romanick wrote:

I did a little more research.  Looking at the commit history, it must
just be an MSVC thing:

commit 78d3cfb5b4fe1dbe052f87458e62cc2be2ecf171
Author: Brian Paul 
Date:   Sun Nov 4 16:43:44 2012 -0700

 glsl: remove incorrect 'struct' keyword

 ir_variable is a class, not a struct.  Fixes an MSVC warning.

 Reviewed-by: Jose Fonseca 

So, I think it is just class vs struct (and not struct vs nothing) that
causes the warning.




Mixing class and struct is fine in the Itanium ABI (the ABI used by gcc 
on Linux), they're both mangled the same. On Windows, the mangling is 
different and mixing class and struct will actually cause problems. 
Usually the problem is a failure to link.


For the kicks of it, clang++ has -Wmismatched-tags (part of -Wall) that 
will warn about mixing.


(As far as the C++ standard is concerned, `struct' class-key, `class' 
class-key and `union' class-key create "classes")


In a nutshell, for Windows builds, struct and class must match. For 
testing on Linux, CXX=clang++ CXXFLAGS=-Werror=mismatched-tags.


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 19/20] mesa/main: Add missing null check in _mesa_CreatePerfQueryINTEL()

2014-05-15 Thread Petri Latvala

Reviewed-by: Petri Latvala 

On 05/14/2014 08:56 PM, Juha-Pekka Heikkila wrote:

Signed-off-by: Juha-Pekka Heikkila 
---
  src/mesa/main/performance_monitor.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/src/mesa/main/performance_monitor.c 
b/src/mesa/main/performance_monitor.c
index 21b9423..9d1a6b4 100644
--- a/src/mesa/main/performance_monitor.c
+++ b/src/mesa/main/performance_monitor.c
@@ -1036,6 +1036,11 @@ _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint 
*queryHandle)
 }
  
 m = new_performance_monitor(ctx, first);

+   if (m == NULL) {
+  _mesa_error_no_memory(__func__);
+  return;
+   }
+
 _mesa_HashInsert(ctx->PerfMonitor.Monitors, first, m);
 *queryHandle = first;
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/21] glsl: protect locale_t with a mutex

2014-05-06 Thread Petri Latvala

On 05/06/2014 01:04 AM, Kenneth Graunke wrote:

On 05/05/2014 10:01 AM, Ian Romanick wrote:


The problem I see with that is we don't know what order constructors on
global objects will occur.  Are we sure the contructor will be invoked
before we need the locale now and in the future?

These functions are used during lexing to read floating point
literals...without breaking on locales where the decimal point is a comma.

All constructors have definitely been run by that point, and I can't
imagine we'll ever need to call locale-independent strtof/strtod from
other constructors.




The order of constructors in a single TU is well defined. The usual 
pattern for this kind of initialization is to have the initializer 
object in the header as static:



In strtod.h:

struct loc_initializer
{
  _Initializer()
  {
if (!loc) {
  loc = initialize_it();
}
   }
};

static loc_initializer loc_init;


Any global object constructor that might use loc will have to #include 
strtod.h, therefore loc_init will appear first in that TU, and will 
initialize first.


A real-world example would be std::_Init that initializes std::cout and 
its pals.



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Add an ir_triop_mid3 expression and lower it to mins and maxs.

2014-04-30 Thread Petri Latvala

On 04/29/2014 09:57 PM, Matt Turner wrote:

On Tue, Apr 29, 2014 at 6:01 AM, Petri Latvala  wrote:

For the record, tested this with the following shader:



Cool. Please submit this as a piglit test.


Sent to piglit mailing list, with accompanying tests for min3 and max3.

Wouldn't it be simpler to detect constant arguments in opt_algebraic 
and do the optimization there, and just perform the standard lowering 
here? It seems cleaner to me. I don't think we generate different code 
based on the arguments in any other lowering pass. 


I was thinking about that, but ended up optimizing on lowering. My 
reasoning was that if mid3(x, 1, 3) was transformed to min(max(x, 1), 3) 
in opt_algebraic, backends with theoretical native support for mid3 
would then need to recognize that and transform it back to a mid3.


(Are there even any GPUs that can do mid3 directly? AMD?)

Of course, it can be said that it's not a regression as mid3 doesn't get 
to backends as mid3 before this patch either.


I'll change the patch to do this optimization in opt_algebraic.


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Add an ir_triop_mid3 expression and lower it to mins and maxs.

2014-04-29 Thread Petri Latvala
Just noticed that this obviously conflicts with Ilia Mirkin's patches 
for lowering of carry/borrow. I'll rebase and resend once those land.



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Add an ir_triop_mid3 expression and lower it to mins and maxs.

2014-04-29 Thread Petri Latvala
If mid3 is called with two constants, the resulting IR was two maxes and three
mins, when one max and one min would have sufficed. Make mid3() produce an
ir_expression with ir_triop_mid3 (new ir_expression operation) and lower it in
a lower_instructions pass to the needed amount of mins and maxs.

Tested on i965/Haswell.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76861
Signed-off-by: Petri Latvala 
---

For the record, tested this with the following shader:


#extension GL_AMD_shader_trinary_minmax : require

uniform float zero;
uniform float one;
uniform float middle;

float test_all_constants()
{
return mid3(0.0, 1.0, 0.5);
}

float test_two_constants()
{
return mid3(0.5, one, 0.0);
}

float test_one_constant()
{
return mid3(one, zero, 0.5);
}

float test_no_constants()
{
return mid3(middle, one, zero);
}

void main()
{
float r = test_all_constants();
float g = test_two_constants();
float b = test_one_constant();
float a = test_no_constants();

gl_FragColor = vec4(r, g, b, a);
}


total instructions in shared programs: 61 -> 57 (-6.56%)
instructions in affected programs: 56 -> 52 (-7.14%)


Existing piglit tests didn't stress the two-constants case at all so
no results from there. Other than all tests passing, naturally.



 src/glsl/builtin_functions.cpp |   2 +-
 src/glsl/ir.cpp|   2 +
 src/glsl/ir.h  |   9 +-
 src/glsl/ir_constant_expression.cpp|  22 
 src/glsl/ir_optimization.h |   1 +
 src/glsl/ir_validate.cpp   |   6 ++
 src/glsl/lower_instructions.cpp| 112 +
 .../dri/i965/brw_fs_channel_expressions.cpp|   1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   |   6 ++
 src/mesa/drivers/dri/i965/brw_shader.cpp   |   3 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   3 +
 src/mesa/main/macros.h |   3 +
 src/mesa/program/ir_to_mesa.cpp|   5 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   2 +
 14 files changed, 174 insertions(+), 3 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 3991f9d..12bbfe0 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -4260,7 +4260,7 @@ builtin_builder::_mid3(const glsl_type *type)
ir_variable *z = in_var(type, "z");
MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z);
 
-   ir_expression *mid3 = max2(min2(x, y), max2(min2(x, z), min2(y, z)));
+   ir_expression *mid3 = expr(ir_triop_mid3, x, y, z);
body.emit(ret(mid3));
 
return sig;
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 1a18b47..bc585d6 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -436,6 +436,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, 
ir_rvalue *op1,
case ir_triop_lrp:
case ir_triop_bitfield_extract:
case ir_triop_vector_insert:
+   case ir_triop_mid3:
   this->type = op0->type;
   break;
 
@@ -566,6 +567,7 @@ static const char *const operator_strs[] = {
"bfi",
"bitfield_extract",
"vector_insert",
+   "mid3",
"bitfield_insert",
"vector",
 };
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 6c7c60a..399a4ce 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1390,9 +1390,16 @@ enum ir_expression_operation {
ir_triop_vector_insert,
 
/**
+* \name Yield the per-component median of three values, part of 
AMD_shader_trinary_minmax.
+*/
+   /*@{*/
+   ir_triop_mid3,
+   /*@}*/
+
+   /**
 * A sentinel marking the last of the ternary operations.
 */
-   ir_last_triop = ir_triop_vector_insert,
+   ir_last_triop = ir_triop_mid3,
 
ir_quadop_bitfield_insert,
 
diff --git a/src/glsl/ir_constant_expression.cpp 
b/src/glsl/ir_constant_expression.cpp
index 8afe8f7..1d4c0e5 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -1575,6 +1575,28 @@ ir_expression::constant_expression_value(struct 
hash_table *variable_context)
   break;
}
 
+   case ir_triop_mid3: {
+  assert(op[0]->type == op[1]->type);
+  assert(op[0]->type == op[2]->type);
+
+  for (unsigned c = 0; c < components; c++) {
+switch (op[0]->type->base_type) {
+case GLSL_TYPE_UINT:
+   data.u[c] = MID3(op[0]->value.u[c], op[1]->value.u[c], 
op[2]->value.u[c]);
+   break;
+case GLSL_TYPE_INT:
+   data.i[c] = MID3(op[0]->value.i[c], op[1]->value.i[c], 
op[2]->value.i[c]);
+   break;
+case GLSL_TYPE_FLOAT:
+   data.f[c] = MID3(op[0]->value.f[c], op[1]->value.f[c], 
op[2]->value.f[c]);
+   break;
+default:
+   assert(0);
+ 

[Mesa-dev] [PATCH 5/6] i965: Enable INTEL_performance_query for Gen5+.

2014-04-23 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 892a048..d6e1494 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -311,8 +311,10 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_stencil_texturing = true;
}
 
-   if (brw->gen == 5 || can_write_oacontrol(brw))
+   if (brw->gen == 5 || can_write_oacontrol(brw)) {
   ctx->Extensions.AMD_performance_monitor = true;
+  ctx->Extensions.INTEL_performance_query = true;
+   }
 
if (ctx->API == API_OPENGL_CORE)
   ctx->Extensions.ARB_base_instance = true;
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] mesa: Add INTEL_performance_query enums to tests/enum_strings.cpp

2014-04-23 Thread Petri Latvala
Signed-off-by: Petri Latvala 
Reviewed-by: Ian Romanick 
---
 src/mesa/main/tests/enum_strings.cpp | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/mesa/main/tests/enum_strings.cpp 
b/src/mesa/main/tests/enum_strings.cpp
index 3795700..d16eb36 100644
--- a/src/mesa/main/tests/enum_strings.cpp
+++ b/src/mesa/main/tests/enum_strings.cpp
@@ -807,6 +807,9 @@ const struct enum_info everything[] = {
{ 0x83F1, "GL_COMPRESSED_RGBA_S3TC_DXT1_EXT" },
{ 0x83F2, "GL_COMPRESSED_RGBA_S3TC_DXT3_ANGLE" },
{ 0x83F3, "GL_COMPRESSED_RGBA_S3TC_DXT5_ANGLE" },
+   { 0x83F9, "GL_PERFQUERY_DONOT_FLUSH_INTEL" },
+   { 0x83FA, "GL_PERFQUERY_FLUSH_INTEL" },
+   { 0x83FB, "GL_PERFQUERY_WAIT_INTEL" },
{ 0x844D, "GL_NEAREST_CLIPMAP_NEAREST_SGIX" },
{ 0x844E, "GL_NEAREST_CLIPMAP_LINEAR_SGIX" },
{ 0x844F, "GL_LINEAR_CLIPMAP_NEAREST_SGIX" },
@@ -1843,6 +1846,21 @@ const struct enum_info everything[] = {
{ 0x9271, "GL_COMPRESSED_SIGNED_R11_EAC" },
{ 0x9272, "GL_COMPRESSED_RG11_EAC" },
{ 0x9273, "GL_COMPRESSED_SIGNED_RG11_EAC" },
+   { 0x94F0, "GL_PERFQUERY_COUNTER_EVENT_INTEL" },
+   { 0x94F1, "GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL" },
+   { 0x94F2, "GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL" },
+   { 0x94F3, "GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL" },
+   { 0x94F4, "GL_PERFQUERY_COUNTER_RAW_INTEL" },
+   { 0x94F5, "GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL" },
+   { 0x94F8, "GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL" },
+   { 0x94F9, "GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL" },
+   { 0x94FA, "GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL" },
+   { 0x94FB, "GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL" },
+   { 0x94FC, "GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL" },
+   { 0x94FD, "GL_PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL" },
+   { 0x94FE, "GL_PERFQUERY_COUNTER_NAME_LENGTH_MAX_INTEL" },
+   { 0x94FF, "GL_PERFQUERY_COUNTER_DESC_LENGTH_MAX_INTEL" },
+   { 0x9500, "GL_PERFQUERY_GPA_EXTENDED_COUNTERS_INTEL" },
{ 0x19262, "GL_RASTER_POSITION_UNCLIPPED_IBM" },
{ 0, NULL }
 };
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] docs: update 10.2 release notes

2014-04-23 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 docs/relnotes/10.2.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/relnotes/10.2.html b/docs/relnotes/10.2.html
index d7d557b..473739c 100644
--- a/docs/relnotes/10.2.html
+++ b/docs/relnotes/10.2.html
@@ -47,6 +47,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_buffer_storage on i965, r300, r600, and radeonsi
 GL_ARB_stencil_texturing on i965/gen8+
 GL_ARB_texture_view on i965/gen7
+GL_INTEL_performance_query on i965/gen5+
 
 
 
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] v3: Implement INTEL_performance_query

2014-04-23 Thread Petri Latvala
Third revision of the patch series. Changes:

- Rebased to current master
- Changes based on Ian's review
- Add the extension to 10.2 release notes

I didn't change patch 5/6 "Enable INTEL_performance_query for Gen5+"
along the review comments yet. It's true that currently drivers can
support both by just implementing the current driver hooks, but I'm
worried that the situation might change with the upcoming changes to
the driver functions to support semantic types and normalized
counters. I have no concrete examples that would make that happen, I
might just be too paranoid and wary.

Summarum, there's more changes for this extension incoming, and that
sort of cleanup can be part of it if so desired.

I will need someone to push this btw.

Petri Latvala (6):
  Regenerate gl_mangle.h.
  mesa: Add INTEL_performance_query enums to tests/enum_strings.cpp
  mesa: Add core support for the GL_INTEL_performance_query extension.
  mesa: Implement INTEL_performance_query.
  i965: Enable INTEL_performance_query for Gen5+.
  docs: update 10.2 release notes

 docs/relnotes/10.2.html|   1 +
 include/GL/gl_mangle.h | 371 ++-
 src/mapi/glapi/gen/INTEL_performance_query.xml |  93 
 src/mapi/glapi/gen/Makefile.am |   1 +
 src/mapi/glapi/gen/gl_API.xml  |   2 +
 src/mesa/drivers/dri/i965/intel_extensions.c   |   4 +-
 src/mesa/main/config.h |   8 +
 src/mesa/main/extensions.c |   1 +
 src/mesa/main/get.c|   1 +
 src/mesa/main/get_hash_params.py   |   6 +
 src/mesa/main/mtypes.h |   1 +
 src/mesa/main/performance_monitor.c| 614 +
 src/mesa/main/performance_monitor.h|  43 +-
 src/mesa/main/tests/dispatch_sanity.cpp|  24 +
 src/mesa/main/tests/enum_strings.cpp   |  18 +
 15 files changed, 1180 insertions(+), 8 deletions(-)
 create mode 100644 src/mapi/glapi/gen/INTEL_performance_query.xml

-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] mesa: Implement INTEL_performance_query.

2014-04-23 Thread Petri Latvala
Using the existing driver hooks made for AMD_performance_monitor, implement
INTEL_performance_query functions.

v2: Whitespace changes.
v3: Whitespace changes, add a _mesa_warning()

Signed-off-by: Petri Latvala 
Reviewed-by: Ian Romanick 
---
 src/mesa/main/performance_monitor.c | 487 
 1 file changed, 441 insertions(+), 46 deletions(-)

diff --git a/src/mesa/main/performance_monitor.c 
b/src/mesa/main/performance_monitor.c
index 597f633..21b9423 100644
--- a/src/mesa/main/performance_monitor.c
+++ b/src/mesa/main/performance_monitor.c
@@ -137,6 +137,46 @@ get_counter(const struct gl_perf_monitor_group *group_obj, 
GLuint id)
return &group_obj->Counters[id];
 }
 
+/* For INTEL_performance_query, query id 0 is reserved to be invalid. We use
+ * index to Groups array + 1 as the query id. Same applies to counter id.
+ */
+static inline GLuint
+queryid_to_index(GLuint queryid)
+{
+   return queryid - 1;
+}
+
+static inline GLuint
+index_to_queryid(GLuint index)
+{
+   return index + 1;
+}
+
+static inline bool
+queryid_valid(const struct gl_context *ctx, GLuint queryid)
+{
+   return get_group(ctx, queryid_to_index(queryid)) != NULL;
+}
+
+static inline GLuint
+counterid_to_index(GLuint counterid)
+{
+   return counterid - 1;
+}
+
+static inline GLuint
+index_to_counterid(GLuint index)
+{
+   return index + 1;
+}
+
+static inline bool
+counterid_valid(const struct gl_perf_monitor_group *group_obj,
+GLuint counterid)
+{
+   return get_counter(group_obj, counterid_to_index(counterid)) != NULL;
+}
+
 /*/
 
 void GLAPIENTRY
@@ -644,6 +684,7 @@ extern void GLAPIENTRY
 _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
 {
GET_CURRENT_CONTEXT(ctx);
+   unsigned numGroups;
 
/* The GL_INTEL_performance_query spec says:
 *
@@ -655,16 +696,22 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
   return;
}
 
+   numGroups = ctx->PerfMonitor.NumGroups;
+
/* The GL_INTEL_performance_query spec says:
 *
 *"If the given hardware platform doesn't support any performance
 *queries, then the value of 0 is returned and INVALID_OPERATION error
 *is raised."
 */
+   if (numGroups == 0) {
+  *queryId = 0;
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glGetFirstPerfQueryIdINTEL(no queries supported)");
+  return;
+   }
 
-   *queryId = 0;
-   _mesa_error(ctx, GL_INVALID_OPERATION,
-   "glGetFirstPerfQueryIdINTEL(no queries supported)");
+   *queryId = index_to_queryid(0);
 }
 
 extern void GLAPIENTRY
@@ -674,40 +721,66 @@ _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint 
*nextQueryId)
 
/* The GL_INTEL_performance_query spec says:
 *
-*"If nextQueryId pointer is equal to 0, an INVALID_VALUE error is
-*generated."
+*"The result is passed in location pointed by nextQueryId. If query
+*identified by queryId is the last query available the value of 0 is
+*returned. If the specified performance query identifier is invalid
+*then INVALID_VALUE error is generated. If nextQueryId pointer is
+*equal to 0, an INVALID_VALUE error is generated.  Whenever error is
+*generated, the value of 0 is returned."
 */
+
if (!nextQueryId) {
   _mesa_error(ctx, GL_INVALID_VALUE,
   "glGetNextPerfQueryIdINTEL(nextQueryId == NULL)");
   return;
}
 
-   /* The GL_INTEL_performance_query spec says:
-*
-*"If the specified performance query identifier is invalid then
-*INVALID_VALUE error is generated. Whenever error is generated, the
-*value of 0 is returned."
-*
-* No queries are supported, so all queries are invalid.
-*/
-   *nextQueryId = 0;
-   _mesa_error(ctx, GL_INVALID_VALUE,
-   "glGetNextPerfQueryIdINTEL(invalid query)");
+   if (!queryid_valid(ctx, queryId)) {
+  *nextQueryId = 0;
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetNextPerfQueryIdINTEL(invalid query)");
+  return;
+   }
+
+   ++queryId;
+
+   if (!queryid_valid(ctx, queryId)) {
+  *nextQueryId = 0;
+   } else {
+  *nextQueryId = queryId;
+   }
 }
 
 extern void GLAPIENTRY
 _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
 {
GET_CURRENT_CONTEXT(ctx);
+   unsigned i;
 
/* The GL_INTEL_performance_query spec says:
 *
 *"If queryName does not reference a valid query name, an INVALID_VALUE
 *error is generated."
-*
-* No queries are supported, so all query names are invalid.
 */
+   if (!queryName) {
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetPerfQueryIdByNameINTEL(queryName == NULL)");
+  return;
+   }
+
+   /* The specification does not state that this produ

[Mesa-dev] [PATCH 3/6] mesa: Add core support for the GL_INTEL_performance_query extension.

2014-04-23 Thread Petri Latvala
Like AMD_performance_monitor, this extension provides an interface for
applications (and OpenGL-based tools) to access GPU performance
counters. Since the exact performance counters available vary between
vendors and hardware generations, the extension provides an API the
application can use to get the names, types, and minimum/maximum
values of all available counters.

Applications create performance queries based on available query
types, and begin/end measurement collection. Multiple queries can be
measuring simultaneously.

v2: Whitespace changes
v3: src/mapi/glapi/gen/gl_API.xml: Also expose the functions to GLES2.
v4: Whitespace changes, static_dispatch="false" for all functions, fix
dispatch_sanity test for GLES2 functions

Signed-off-by: Petri Latvala 
Reviewed-by: Ian Romanick 
---
 src/mapi/glapi/gen/INTEL_performance_query.xml |  93 +++
 src/mapi/glapi/gen/Makefile.am |   1 +
 src/mapi/glapi/gen/gl_API.xml  |   2 +
 src/mesa/main/config.h |   8 +
 src/mesa/main/extensions.c |   1 +
 src/mesa/main/get.c|   1 +
 src/mesa/main/get_hash_params.py   |   6 +
 src/mesa/main/mtypes.h |   1 +
 src/mesa/main/performance_monitor.c| 219 +
 src/mesa/main/performance_monitor.h|  43 -
 src/mesa/main/tests/dispatch_sanity.cpp|  24 +++
 11 files changed, 398 insertions(+), 1 deletion(-)
 create mode 100644 src/mapi/glapi/gen/INTEL_performance_query.xml

diff --git a/src/mapi/glapi/gen/INTEL_performance_query.xml 
b/src/mapi/glapi/gen/INTEL_performance_query.xml
new file mode 100644
index 000..25cd181
--- /dev/null
+++ b/src/mapi/glapi/gen/INTEL_performance_query.xml
@@ -0,0 +1,93 @@
+
+
+
+
+
+
+
+  
+
+  
+
+  
+
+
+  
+
+  
+
+
+  
+
+  
+
+
+
+
+
+
+
+  
+
+  
+
+
+
+
+
+
+
+
+
+
+
+  
+
+  
+
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+
+
+
+
+  
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index 6b932e7..409d356 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -171,6 +171,7 @@ API_XML = \
EXT_texture_array.xml \
EXT_texture_integer.xml \
EXT_transform_feedback.xml \
+   INTEL_performance_query.xml \
KHR_debug.xml \
NV_conditional_render.xml \
NV_primitive_restart.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 9200cd6..71b39a8 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -12808,6 +12808,8 @@
 
 
 
+http://www.w3.org/2001/XInclude"/>
+
 
 
 
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 30da5d4..c96502a 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -281,6 +281,14 @@
 #define MAX_VERTEX_STREAMS  4
 /*@}*/
 
+/** For GL_INTEL_performance_query */
+/*@{*/
+#define MAX_PERFQUERY_QUERY_NAME_LENGTH 256
+#define MAX_PERFQUERY_COUNTER_NAME_LENGTH   256
+#define MAX_PERFQUERY_COUNTER_DESC_LENGTH   1024
+#define PERFQUERY_HAVE_GPA_EXTENDED_COUNTERS 0
+/*@}*/
+
 /*
  * Color channel component order
  * 
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index a72284c..6c1c033 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -330,6 +330,7 @@ static const struct extension extension_table[] = {
{ "GL_IBM_rasterpos_clip",  o(dummy_true),  
GLL,1996 },
{ "GL_IBM_texture_mirrored_repeat", o(dummy_true),  
GLL,1998 },
{ "GL_INGR_blend_func_separate",o(EXT_blend_func_separate), 
GLL,1999 },
+   { "GL_INTEL_performance_query", o(INTEL_performance_query), 
  GL | ES2, 2013 },
{ "GL_MESA_pack_invert",o(MESA_pack_invert),
GL, 2002 },
{ "GL_MESA_texture_signed_rgba",o(EXT_texture_snorm),   
GL, 2009 },
{ "GL_MESA_window_pos", o(dummy_true),  
GLL,2000 },
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 6d95790..1897e8d 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -395,6 +395,7 @@ EXTRA_EXT(ARB_viewport_array);
 EXTRA_EXT(ARB_compute_shader);
 EXTRA_EXT(ARB_gpu_shader5);
 EXTRA_EXT2(ARB_transform_feedback3, ARB_gpu_shader5);
+EXTRA_EXT(INTEL_performance_query);
 
 static const int
 extra_ARB_color_bu

Re: [Mesa-dev] [PATCH 5/6] mesa: Implement INTEL_performance_query.

2014-04-22 Thread Petri Latvala

On 04/19/2014 12:34 AM, Ian Romanick wrote:

On 03/17/2014 01:43 AM, Petri Latvala wrote:
+   if (queryName) {
+  strncpy(queryName, group_obj->Name, queryNameLength);
+  /* No specification given about whether the string needs to be
+   * zero-terminated.  Zero-terminate the string anyway, no way for the
+   * application to know if the buffer was large enough.
+   */
Ugh... there's no way to ask the length of the longest name (like you
can do with program uniforms)?  Bad spec, bad!




There's GL_PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL etc. for all these 
string getters, so the comment might be a bit misleading. I'll rewrite 
it. The intention is to ensure that the written data is always 
null-terminated, even if the given buffer was too small. strncpy will 
already write the \0 at the end unless the buffer was too small. 
glGetActiveUniform() and pals return the length of the actual data 
written (not to mention explicitly specifying that the string is 
null-terminated), this API does not.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] mesa: add new enum MAX_UNIFORM_LOCATIONS

2014-04-09 Thread Petri Latvala

On 04/09/2014 12:56 PM, Tapani Pälli wrote:

diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 06d0bba..5709d42 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -474,6 +474,7 @@ descriptor=[
[ "MAX_LIST_NESTING", "CONST(MAX_LIST_NESTING), NO_EXTRA" ],
[ "MAX_NAME_STACK_DEPTH", "CONST(MAX_NAME_STACK_DEPTH), NO_EXTRA" ],
[ "MAX_PIXEL_MAP_TABLE", "CONST(MAX_PIXEL_MAP_TABLE), NO_EXTRA" ],
+  [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), 
NO_EXTRA" ],
[ "NAME_STACK_DEPTH", "CONTEXT_INT(Select.NameStackDepth), NO_EXTRA" ],
[ "PACK_LSB_FIRST", "CONTEXT_BOOL(Pack.LsbFirst), NO_EXTRA" ],
[ "PACK_SWAP_BYTES", "CONTEXT_BOOL(Pack.SwapBytes), NO_EXTRA" ],



Should that NO_EXTRA be extra_ARB_explicit_uniform_location?


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Add core support for the GL_INTEL_performance_query extension.

2014-04-03 Thread Petri Latvala
Like AMD_performance_monitor, this extension provides an interface for
applications (and OpenGL-based tools) to access GPU performance
counters. Since the exact performance counters available vary between
vendors and hardware generations, the extension provides an API the
application can use to get the names, types, and minimum/maximum
values of all available counters.

Applications create performance queries based on available query
types, and begin/end measurement collection. Multiple queries can be
measuring simultaneously.

v2: Whitespace changes.
v3: src/mapi/glapi/gen/gl_API.xml: Also expose the functions to GLES2.

Signed-off-by: Petri Latvala 
---
 src/mapi/glapi/gen/INTEL_performance_query.xml |  93 
 src/mapi/glapi/gen/Makefile.am |   1 +
 src/mapi/glapi/gen/gl_API.xml  |   2 +
 src/mesa/main/config.h |   8 +
 src/mesa/main/extensions.c |   1 +
 src/mesa/main/get.c|   1 +
 src/mesa/main/get_hash_params.py   |   6 +
 src/mesa/main/mtypes.h |   1 +
 src/mesa/main/performance_monitor.c| 195 +
 src/mesa/main/performance_monitor.h|  43 +-
 src/mesa/main/tests/dispatch_sanity.cpp|  12 ++
 11 files changed, 362 insertions(+), 1 deletion(-)
 create mode 100644 src/mapi/glapi/gen/INTEL_performance_query.xml

diff --git a/src/mapi/glapi/gen/INTEL_performance_query.xml 
b/src/mapi/glapi/gen/INTEL_performance_query.xml
new file mode 100644
index 000..0f4d687
--- /dev/null
+++ b/src/mapi/glapi/gen/INTEL_performance_query.xml
@@ -0,0 +1,93 @@
+
+
+
+
+
+
+
+  
+
+  
+
+  
+
+
+  
+
+  
+
+
+  
+
+  
+
+
+
+
+
+
+
+  
+
+  
+
+
+
+
+
+
+
+
+
+
+
+  
+
+  
+
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+
+
+
+
+  
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index 7b3c118..1c5b61c 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -147,6 +147,7 @@ API_XML = \
EXT_texture_array.xml \
EXT_texture_integer.xml \
EXT_transform_feedback.xml \
+   INTEL_performance_query.xml \
NV_conditional_render.xml \
NV_primitive_restart.xml \
NV_texture_barrier.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 9129d57..8fbf700 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -12840,6 +12840,8 @@
 
 
 
+http://www.w3.org/2001/XInclude"/>
+
 
 
 
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 30da5d4..c96502a 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -281,6 +281,14 @@
 #define MAX_VERTEX_STREAMS  4
 /*@}*/
 
+/** For GL_INTEL_performance_query */
+/*@{*/
+#define MAX_PERFQUERY_QUERY_NAME_LENGTH 256
+#define MAX_PERFQUERY_COUNTER_NAME_LENGTH   256
+#define MAX_PERFQUERY_COUNTER_DESC_LENGTH   1024
+#define PERFQUERY_HAVE_GPA_EXTENDED_COUNTERS 0
+/*@}*/
+
 /*
  * Color channel component order
  * 
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index a72284c..e8f125d 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -330,6 +330,7 @@ static const struct extension extension_table[] = {
{ "GL_IBM_rasterpos_clip",  o(dummy_true),  
GLL,1996 },
{ "GL_IBM_texture_mirrored_repeat", o(dummy_true),  
GLL,1998 },
{ "GL_INGR_blend_func_separate",o(EXT_blend_func_separate), 
GLL,1999 },
+   { "GL_INTEL_performance_query", o(INTEL_performance_query), 
GL | ES2,   2013 },
{ "GL_MESA_pack_invert",o(MESA_pack_invert),
GL, 2002 },
{ "GL_MESA_texture_signed_rgba",o(EXT_texture_snorm),   
GL, 2009 },
{ "GL_MESA_window_pos", o(dummy_true),  
GLL,2000 },
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 88cf202..8c4ac08 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -394,6 +394,7 @@ EXTRA_EXT(ARB_viewport_array);
 EXTRA_EXT(ARB_compute_shader);
 EXTRA_EXT(ARB_gpu_shader5);
 EXTRA_EXT2(ARB_transform_feedback3, ARB_gpu_shader5);
+EXTRA_EXT(INTEL_performance_query);
 
 static const int
 extra_ARB_color_buffer_float_or_glcore[] = {
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 674d003..4c0838e 100644

[Mesa-dev] [PATCH 6/6] i965: Enable INTEL_performance_query for Gen5+.

2014-03-17 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 2a68758..5dbd1e6 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -308,8 +308,10 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_stencil_texturing = true;
}
 
-   if (brw->gen == 5 || can_write_oacontrol(brw))
+   if (brw->gen == 5 || can_write_oacontrol(brw)) {
   ctx->Extensions.AMD_performance_monitor = true;
+  ctx->Extensions.INTEL_performance_query = true;
+   }
 
if (ctx->API == API_OPENGL_CORE)
   ctx->Extensions.ARB_base_instance = true;
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] mesa: Implement INTEL_performance_query.

2014-03-17 Thread Petri Latvala
Using the existing driver hooks made for AMD_performance_monitor, implement
INTEL_performance_query functions.

v2: Whitespace changes.

Signed-off-by: Petri Latvala 
---
 src/mesa/main/performance_monitor.c | 472 
 1 file changed, 431 insertions(+), 41 deletions(-)

diff --git a/src/mesa/main/performance_monitor.c 
b/src/mesa/main/performance_monitor.c
index dd22fef..bf58d45 100644
--- a/src/mesa/main/performance_monitor.c
+++ b/src/mesa/main/performance_monitor.c
@@ -137,6 +137,46 @@ get_counter(const struct gl_perf_monitor_group *group_obj, 
GLuint id)
return &group_obj->Counters[id];
 }
 
+/* For INTEL_performance_query, query id 0 is reserved to be invalid. We use
+ * index to Groups array + 1 as the query id. Same applies to counter id.
+ */
+static inline GLuint
+queryid_to_index(GLuint queryid)
+{
+   return queryid - 1;
+}
+
+static inline GLuint
+index_to_queryid(GLuint index)
+{
+   return index + 1;
+}
+
+static inline bool
+queryid_valid(const struct gl_context *ctx, GLuint queryid)
+{
+   return get_group(ctx, queryid_to_index(queryid)) != NULL;
+}
+
+static inline GLuint
+counterid_to_index(GLuint counterid)
+{
+   return counterid - 1;
+}
+
+static inline GLuint
+index_to_counterid(GLuint index)
+{
+   return index + 1;
+}
+
+static inline bool
+counterid_valid(const struct gl_perf_monitor_group *group_obj,
+GLuint counterid)
+{
+   return get_counter(group_obj, counterid_to_index(counterid)) != NULL;
+}
+
 /*/
 
 void GLAPIENTRY
@@ -645,6 +685,8 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
 {
GET_CURRENT_CONTEXT(ctx);
 
+   unsigned numGroups;
+
/* "If queryId pointer is equal to 0, INVALID_VALUE error is generated."
 */
if (!queryId) {
@@ -653,13 +695,19 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
   return;
}
 
+   numGroups = ctx->PerfMonitor.NumGroups;
+
/* "If the given hardware platform doesn't support any performance queries,
 * then the value of 0 is returned and INVALID_OPERATION error is raised."
 */
+   if (numGroups == 0) {
+  *queryId = 0;
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glGetFirstPerfQueryIdINTEL(no queries supported)");
+  return;
+   }
 
-   *queryId = 0;
-   _mesa_error(ctx, GL_INVALID_OPERATION,
-   "glGetFirstPerfQueryIdINTEL(no queries supported)");
+   *queryId = index_to_queryid(0);
 }
 
 extern void GLAPIENTRY
@@ -678,14 +726,25 @@ _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint 
*nextQueryId)
 
/* "If the specified performance
 * query identifier is invalid then INVALID_VALUE error is generated."
-*
-* No queries are supported, so all queries are invalid.
-*
-* "Whenever error is generated, the value of 0 is returned."
 */
-   *nextQueryId = 0;
-   _mesa_error(ctx, GL_INVALID_VALUE,
-   "glGetNextPerfQueryIdINTEL(invalid query)");
+   if (!queryid_valid(ctx, queryId)) {
+  /* "Whenever error is generated, the value of 0 is returned." */
+  *nextQueryId = 0;
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetNextPerfQueryIdINTEL(invalid query)");
+  return;
+   }
+
+   ++queryId;
+
+   /* "If query identified by queryId is the last query available the value of
+* 0 is returned."
+*/
+   if (!queryid_valid(ctx, queryId)) {
+  *nextQueryId = 0;
+   } else {
+  *nextQueryId = queryId;
+   }
 }
 
 extern void GLAPIENTRY
@@ -693,10 +752,32 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint 
*queryId)
 {
GET_CURRENT_CONTEXT(ctx);
 
+   unsigned i;
+
+   /* "If queryName does not reference a valid query name, an INVALID_VALUE
+* error is generated."
+*/
+   if (!queryName) {
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetPerfQueryIdByNameINTEL(queryName == NULL)");
+  return;
+   }
+
+   /* The specification does not state that this produces an error. */
+   if (!queryId) {
+  return;
+   }
+
+   for (i = 0; i < ctx->PerfMonitor.NumGroups; ++i) {
+  const struct gl_perf_monitor_group *group_obj = get_group(ctx, i);
+  if (strcmp(group_obj->Name, queryName) == 0) {
+ *queryId = index_to_queryid(i);
+ return;
+  }
+   }
+
/* "If queryName does not reference a valid query name, an INVALID_VALUE
 * error is generated."
-*
-* No queries are supported, so all query names are invalid.
 */
 
_mesa_error(ctx, GL_INVALID_VALUE,
@@ -711,15 +792,65 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
 GLuint *capsMask)
 {
GET_CURRENT_CONTEXT(ctx);
+   unsigned i;
 
-   /* "If queryId does not reference a valid query type, an INVALID_VALUE
-* error is generated."
+   const

[Mesa-dev] [PATCH 3/6] mesa: Add INTEL_performance_query enums to tests/enum_strings.cpp

2014-03-17 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/mesa/main/tests/enum_strings.cpp | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/mesa/main/tests/enum_strings.cpp 
b/src/mesa/main/tests/enum_strings.cpp
index 3795700..d16eb36 100644
--- a/src/mesa/main/tests/enum_strings.cpp
+++ b/src/mesa/main/tests/enum_strings.cpp
@@ -807,6 +807,9 @@ const struct enum_info everything[] = {
{ 0x83F1, "GL_COMPRESSED_RGBA_S3TC_DXT1_EXT" },
{ 0x83F2, "GL_COMPRESSED_RGBA_S3TC_DXT3_ANGLE" },
{ 0x83F3, "GL_COMPRESSED_RGBA_S3TC_DXT5_ANGLE" },
+   { 0x83F9, "GL_PERFQUERY_DONOT_FLUSH_INTEL" },
+   { 0x83FA, "GL_PERFQUERY_FLUSH_INTEL" },
+   { 0x83FB, "GL_PERFQUERY_WAIT_INTEL" },
{ 0x844D, "GL_NEAREST_CLIPMAP_NEAREST_SGIX" },
{ 0x844E, "GL_NEAREST_CLIPMAP_LINEAR_SGIX" },
{ 0x844F, "GL_LINEAR_CLIPMAP_NEAREST_SGIX" },
@@ -1843,6 +1846,21 @@ const struct enum_info everything[] = {
{ 0x9271, "GL_COMPRESSED_SIGNED_R11_EAC" },
{ 0x9272, "GL_COMPRESSED_RG11_EAC" },
{ 0x9273, "GL_COMPRESSED_SIGNED_RG11_EAC" },
+   { 0x94F0, "GL_PERFQUERY_COUNTER_EVENT_INTEL" },
+   { 0x94F1, "GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL" },
+   { 0x94F2, "GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL" },
+   { 0x94F3, "GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL" },
+   { 0x94F4, "GL_PERFQUERY_COUNTER_RAW_INTEL" },
+   { 0x94F5, "GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL" },
+   { 0x94F8, "GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL" },
+   { 0x94F9, "GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL" },
+   { 0x94FA, "GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL" },
+   { 0x94FB, "GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL" },
+   { 0x94FC, "GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL" },
+   { 0x94FD, "GL_PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL" },
+   { 0x94FE, "GL_PERFQUERY_COUNTER_NAME_LENGTH_MAX_INTEL" },
+   { 0x94FF, "GL_PERFQUERY_COUNTER_DESC_LENGTH_MAX_INTEL" },
+   { 0x9500, "GL_PERFQUERY_GPA_EXTENDED_COUNTERS_INTEL" },
{ 0x19262, "GL_RASTER_POSITION_UNCLIPPED_IBM" },
{ 0, NULL }
 };
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] v2: Implement INTEL_performance_query

2014-03-17 Thread Petri Latvala
Second revision for patch series that implements the
INTEL_performance_query extension. Changes: Import glext.h instead of
adding definitions to gl.h, fix whitespace changes caused by folding
into the wrong commit.

Petri Latvala (6):
  mesa: update glext.h to version 20140313
  Regenerate gl_mangle.h.
  mesa: Add INTEL_performance_query enums to tests/enum_strings.cpp
  mesa: Add core support for the GL_INTEL_performance_query extension.
  mesa: Implement INTEL_performance_query.
  i965: Enable INTEL_performance_query for Gen5+.

 include/GL/gl_mangle.h | 371 +++-
 include/GL/glext.h |  82 +++-
 src/mapi/glapi/gen/INTEL_performance_query.xml |  93 
 src/mapi/glapi/gen/Makefile.am |   1 +
 src/mapi/glapi/gen/gl_API.xml  |   2 +
 src/mesa/drivers/dri/i965/intel_extensions.c   |   4 +-
 src/mesa/main/config.h |   8 +
 src/mesa/main/extensions.c |   1 +
 src/mesa/main/get.c|   1 +
 src/mesa/main/get_hash_params.py   |   6 +
 src/mesa/main/mtypes.h |   1 +
 src/mesa/main/performance_monitor.c| 585 +
 src/mesa/main/performance_monitor.h|  43 +-
 src/mesa/main/tests/dispatch_sanity.cpp|  12 +
 src/mesa/main/tests/enum_strings.cpp   |  18 +
 15 files changed, 1214 insertions(+), 14 deletions(-)
 create mode 100644 src/mapi/glapi/gen/INTEL_performance_query.xml

-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] mesa: Add core support for the GL_INTEL_performance_query extension.

2014-03-17 Thread Petri Latvala
Like AMD_performance_monitor, this extension provides an interface for
applications (and OpenGL-based tools) to access GPU performance
counters. Since the exact performance counters available vary between
vendors and hardware generations, the extension provides an API the
application can use to get the names, types, and minimum/maximum
values of all available counters.

Applications create performance queries based on available query
types, and begin/end measurement collection. Multiple queries can be
measuring simultaneously.

v2: Whitespace changes.

Signed-off-by: Petri Latvala 
---
 src/mapi/glapi/gen/INTEL_performance_query.xml |  93 
 src/mapi/glapi/gen/Makefile.am |   1 +
 src/mapi/glapi/gen/gl_API.xml  |   2 +
 src/mesa/main/config.h |   8 +
 src/mesa/main/extensions.c |   1 +
 src/mesa/main/get.c|   1 +
 src/mesa/main/get_hash_params.py   |   6 +
 src/mesa/main/mtypes.h |   1 +
 src/mesa/main/performance_monitor.c| 195 +
 src/mesa/main/performance_monitor.h|  43 +-
 src/mesa/main/tests/dispatch_sanity.cpp|  12 ++
 11 files changed, 362 insertions(+), 1 deletion(-)
 create mode 100644 src/mapi/glapi/gen/INTEL_performance_query.xml

diff --git a/src/mapi/glapi/gen/INTEL_performance_query.xml 
b/src/mapi/glapi/gen/INTEL_performance_query.xml
new file mode 100644
index 000..333856f
--- /dev/null
+++ b/src/mapi/glapi/gen/INTEL_performance_query.xml
@@ -0,0 +1,93 @@
+
+
+
+
+
+
+
+  
+
+  
+
+  
+
+
+  
+
+  
+
+
+  
+
+  
+
+
+
+
+
+
+
+  
+
+  
+
+
+
+
+
+
+
+
+
+
+
+  
+
+  
+
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+
+
+
+
+  
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index 7b3c118..1c5b61c 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -147,6 +147,7 @@ API_XML = \
EXT_texture_array.xml \
EXT_texture_integer.xml \
EXT_transform_feedback.xml \
+   INTEL_performance_query.xml \
NV_conditional_render.xml \
NV_primitive_restart.xml \
NV_texture_barrier.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 9129d57..8fbf700 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -12840,6 +12840,8 @@
 
 
 
+http://www.w3.org/2001/XInclude"/>
+
 
 
 
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 30da5d4..c96502a 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -281,6 +281,14 @@
 #define MAX_VERTEX_STREAMS  4
 /*@}*/
 
+/** For GL_INTEL_performance_query */
+/*@{*/
+#define MAX_PERFQUERY_QUERY_NAME_LENGTH 256
+#define MAX_PERFQUERY_COUNTER_NAME_LENGTH   256
+#define MAX_PERFQUERY_COUNTER_DESC_LENGTH   1024
+#define PERFQUERY_HAVE_GPA_EXTENDED_COUNTERS 0
+/*@}*/
+
 /*
  * Color channel component order
  * 
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index a72284c..e8f125d 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -330,6 +330,7 @@ static const struct extension extension_table[] = {
{ "GL_IBM_rasterpos_clip",  o(dummy_true),  
GLL,1996 },
{ "GL_IBM_texture_mirrored_repeat", o(dummy_true),  
GLL,1998 },
{ "GL_INGR_blend_func_separate",o(EXT_blend_func_separate), 
GLL,1999 },
+   { "GL_INTEL_performance_query", o(INTEL_performance_query), 
GL | ES2,   2013 },
{ "GL_MESA_pack_invert",o(MESA_pack_invert),
GL, 2002 },
{ "GL_MESA_texture_signed_rgba",o(EXT_texture_snorm),   
GL, 2009 },
{ "GL_MESA_window_pos", o(dummy_true),  
GLL,2000 },
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 88cf202..8c4ac08 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -394,6 +394,7 @@ EXTRA_EXT(ARB_viewport_array);
 EXTRA_EXT(ARB_compute_shader);
 EXTRA_EXT(ARB_gpu_shader5);
 EXTRA_EXT2(ARB_transform_feedback3, ARB_gpu_shader5);
+EXTRA_EXT(INTEL_performance_query);
 
 static const int
 extra_ARB_color_buffer_float_or_glcore[] = {
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 674d003..4c0838e 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get

[Mesa-dev] [PATCH 1/6] mesa: update glext.h to version 20140313

2014-03-17 Thread Petri Latvala
---
 include/GL/glext.h | 82 ++
 1 file changed, 76 insertions(+), 6 deletions(-)

diff --git a/include/GL/glext.h b/include/GL/glext.h
index 62bae4c..a626580 100644
--- a/include/GL/glext.h
+++ b/include/GL/glext.h
@@ -6,7 +6,7 @@ extern "C" {
 #endif
 
 /*
-** Copyright (c) 2013 The Khronos Group Inc.
+** Copyright (c) 2013-2014 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -33,7 +33,7 @@ extern "C" {
 ** used to make the header, and the header can be found at
 **   http://www.opengl.org/registry/
 **
-** Khronos $Revision: 24502 $ on $Date: 2013-12-12 13:14:39 -0800 (Thu, 12 Dec 
2013) $
+** Khronos $Revision: 25853 $ on $Date: 2014-03-13 03:40:45 -0700 (Thu, 13 Mar 
2014) $
 */
 
 #if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && 
!defined(__SCITECH_SNAP__)
@@ -53,7 +53,7 @@ extern "C" {
 #define GLAPI extern
 #endif
 
-#define GL_GLEXT_VERSION 20131212
+#define GL_GLEXT_VERSION 20140313
 
 /* Generated C header for:
  * API: gl
@@ -1485,7 +1485,7 @@ typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREPROC) 
(GLenum target, GLenum atta
 typedef void (APIENTRYP PFNGLTEXIMAGE2DMULTISAMPLEPROC) (GLenum target, 
GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, 
GLboolean fixedsamplelocations);
 typedef void (APIENTRYP PFNGLTEXIMAGE3DMULTISAMPLEPROC) (GLenum target, 
GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei 
depth, GLboolean fixedsamplelocations);
 typedef void (APIENTRYP PFNGLGETMULTISAMPLEFVPROC) (GLenum pname, GLuint 
index, GLfloat *val);
-typedef void (APIENTRYP PFNGLSAMPLEMASKIPROC) (GLuint index, GLbitfield mask);
+typedef void (APIENTRYP PFNGLSAMPLEMASKIPROC) (GLuint maskNumber, GLbitfield 
mask);
 #ifdef GL_GLEXT_PROTOTYPES
 GLAPI void APIENTRY glDrawElementsBaseVertex (GLenum mode, GLsizei count, 
GLenum type, const void *indices, GLint basevertex);
 GLAPI void APIENTRY glDrawRangeElementsBaseVertex (GLenum mode, GLuint start, 
GLuint end, GLsizei count, GLenum type, const void *indices, GLint basevertex);
@@ -1505,7 +1505,7 @@ GLAPI void APIENTRY glFramebufferTexture (GLenum target, 
GLenum attachment, GLui
 GLAPI void APIENTRY glTexImage2DMultisample (GLenum target, GLsizei samples, 
GLenum internalformat, GLsizei width, GLsizei height, GLboolean 
fixedsamplelocations);
 GLAPI void APIENTRY glTexImage3DMultisample (GLenum target, GLsizei samples, 
GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean 
fixedsamplelocations);
 GLAPI void APIENTRY glGetMultisamplefv (GLenum pname, GLuint index, GLfloat 
*val);
-GLAPI void APIENTRY glSampleMaski (GLuint index, GLbitfield mask);
+GLAPI void APIENTRY glSampleMaski (GLuint maskNumber, GLbitfield mask);
 #endif
 #endif /* GL_VERSION_3_2 */
 
@@ -7080,6 +7080,10 @@ GLAPI GLuint APIENTRY glCreateShaderProgramEXT (GLenum 
type, const GLchar *strin
 #define GL_SEPARATE_SPECULAR_COLOR_EXT0x81FA
 #endif /* GL_EXT_separate_specular_color */
 
+#ifndef GL_EXT_shader_image_load_formatted
+#define GL_EXT_shader_image_load_formatted 1
+#endif /* GL_EXT_shader_image_load_formatted */
+
 #ifndef GL_EXT_shader_image_load_store
 #define GL_EXT_shader_image_load_store 1
 #define GL_MAX_IMAGE_UNITS_EXT0x8F38
@@ -8126,6 +8130,52 @@ GLAPI void APIENTRY glTexCoordPointervINTEL (GLint size, 
GLenum type, const void
 #endif
 #endif /* GL_INTEL_parallel_arrays */
 
+#ifndef GL_INTEL_performance_query
+#define GL_INTEL_performance_query 1
+#define GL_PERFQUERY_SINGLE_CONTEXT_INTEL 0x
+#define GL_PERFQUERY_GLOBAL_CONTEXT_INTEL 0x0001
+#define GL_PERFQUERY_WAIT_INTEL   0x83FB
+#define GL_PERFQUERY_FLUSH_INTEL  0x83FA
+#define GL_PERFQUERY_DONOT_FLUSH_INTEL0x83F9
+#define GL_PERFQUERY_COUNTER_EVENT_INTEL  0x94F0
+#define GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL 0x94F1
+#define GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL 0x94F2
+#define GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL 0x94F3
+#define GL_PERFQUERY_COUNTER_RAW_INTEL0x94F4
+#define GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL 0x94F5
+#define GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL 0x94F8
+#define GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL 0x94F9
+#define GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL 0x94FA
+#define GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL 0x94FB
+#define GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL 0x94FC
+#define GL_PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL 0x94FD
+#define GL_PERFQUERY_COUNTER_NAME_LENGTH_MAX_INTEL 0x94FE
+#define GL_PERFQUERY_COUNTER_DESC_LENGTH_MAX_INTEL 0x94FF
+#define GL_PERFQUERY_GPA_EXTENDED_COUNTERS_INTEL 0x9500
+typedef void (APIENTRYP PFNGLBEGINPERFQUERYINTELPROC) (GLuint queryHandle);
+typedef void (APIENTRYP PFNGLCREATEPERFQUERYINTELPROC) (GLuint queryId, GLuint 
*queryHandle);
+typedef void (APIENTRYP PFNGLDELETEPERFQUERYINTELPROC) (GLuint queryHandle);
+typedef void (APIENTRYP PFNGLENDPERFQU

Re: [Mesa-dev] [PATCH 0/6] Implement INTEL_performance_query

2014-03-12 Thread Petri Latvala
One concern I forgot to mention: Interoperability with 
AMD_performance_monitor.


With this patch series as is, after glBeginPerfQueryINTEL, there will be 
a value you can pass to glEndPerfMonitorAMD that won't produce 
GL_INVALID_VALUE as it should. Making sure context's monitor objects are 
accessible only through the extension that created them is something 
that needs to be done, but I'm not sure what would be the best way. 
Comments and hits welcome.



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] i965: Enable INTEL_performance_query for Gen5+.

2014-03-12 Thread Petri Latvala

On 03/12/2014 03:17 PM, Dragomir Ivanov wrote:

What about other drivers supporting `AMD_performance_monitor`? Are there any?


Other drivers don't support it.


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] mesa: Add INTEL_performance_query enums to tests/enum_strings.cpp

2014-03-12 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/mesa/main/tests/enum_strings.cpp | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/mesa/main/tests/enum_strings.cpp 
b/src/mesa/main/tests/enum_strings.cpp
index 3795700..d16eb36 100644
--- a/src/mesa/main/tests/enum_strings.cpp
+++ b/src/mesa/main/tests/enum_strings.cpp
@@ -807,6 +807,9 @@ const struct enum_info everything[] = {
{ 0x83F1, "GL_COMPRESSED_RGBA_S3TC_DXT1_EXT" },
{ 0x83F2, "GL_COMPRESSED_RGBA_S3TC_DXT3_ANGLE" },
{ 0x83F3, "GL_COMPRESSED_RGBA_S3TC_DXT5_ANGLE" },
+   { 0x83F9, "GL_PERFQUERY_DONOT_FLUSH_INTEL" },
+   { 0x83FA, "GL_PERFQUERY_FLUSH_INTEL" },
+   { 0x83FB, "GL_PERFQUERY_WAIT_INTEL" },
{ 0x844D, "GL_NEAREST_CLIPMAP_NEAREST_SGIX" },
{ 0x844E, "GL_NEAREST_CLIPMAP_LINEAR_SGIX" },
{ 0x844F, "GL_LINEAR_CLIPMAP_NEAREST_SGIX" },
@@ -1843,6 +1846,21 @@ const struct enum_info everything[] = {
{ 0x9271, "GL_COMPRESSED_SIGNED_R11_EAC" },
{ 0x9272, "GL_COMPRESSED_RG11_EAC" },
{ 0x9273, "GL_COMPRESSED_SIGNED_RG11_EAC" },
+   { 0x94F0, "GL_PERFQUERY_COUNTER_EVENT_INTEL" },
+   { 0x94F1, "GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL" },
+   { 0x94F2, "GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL" },
+   { 0x94F3, "GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL" },
+   { 0x94F4, "GL_PERFQUERY_COUNTER_RAW_INTEL" },
+   { 0x94F5, "GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL" },
+   { 0x94F8, "GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL" },
+   { 0x94F9, "GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL" },
+   { 0x94FA, "GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL" },
+   { 0x94FB, "GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL" },
+   { 0x94FC, "GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL" },
+   { 0x94FD, "GL_PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL" },
+   { 0x94FE, "GL_PERFQUERY_COUNTER_NAME_LENGTH_MAX_INTEL" },
+   { 0x94FF, "GL_PERFQUERY_COUNTER_DESC_LENGTH_MAX_INTEL" },
+   { 0x9500, "GL_PERFQUERY_GPA_EXTENDED_COUNTERS_INTEL" },
{ 0x19262, "GL_RASTER_POSITION_UNCLIPPED_IBM" },
{ 0, NULL }
 };
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] mesa: Implement INTEL_performance_query.

2014-03-12 Thread Petri Latvala
Using the existing driver hooks made for AMD_performance_monitor, implement
INTEL_performance_query functions.

Signed-off-by: Petri Latvala 
---
 src/mesa/main/performance_monitor.c | 476 +---
 1 file changed, 439 insertions(+), 37 deletions(-)

diff --git a/src/mesa/main/performance_monitor.c 
b/src/mesa/main/performance_monitor.c
index 183a895..bf58d45 100644
--- a/src/mesa/main/performance_monitor.c
+++ b/src/mesa/main/performance_monitor.c
@@ -137,6 +137,46 @@ get_counter(const struct gl_perf_monitor_group *group_obj, 
GLuint id)
return &group_obj->Counters[id];
 }
 
+/* For INTEL_performance_query, query id 0 is reserved to be invalid. We use
+ * index to Groups array + 1 as the query id. Same applies to counter id.
+ */
+static inline GLuint
+queryid_to_index(GLuint queryid)
+{
+   return queryid - 1;
+}
+
+static inline GLuint
+index_to_queryid(GLuint index)
+{
+   return index + 1;
+}
+
+static inline bool
+queryid_valid(const struct gl_context *ctx, GLuint queryid)
+{
+   return get_group(ctx, queryid_to_index(queryid)) != NULL;
+}
+
+static inline GLuint
+counterid_to_index(GLuint counterid)
+{
+   return counterid - 1;
+}
+
+static inline GLuint
+index_to_counterid(GLuint index)
+{
+   return index + 1;
+}
+
+static inline bool
+counterid_valid(const struct gl_perf_monitor_group *group_obj,
+GLuint counterid)
+{
+   return get_counter(group_obj, counterid_to_index(counterid)) != NULL;
+}
+
 /*/
 
 void GLAPIENTRY
@@ -645,19 +685,29 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
 {
GET_CURRENT_CONTEXT(ctx);
 
+   unsigned numGroups;
+
/* "If queryId pointer is equal to 0, INVALID_VALUE error is generated."
 */
if (!queryId) {
-  _mesa_error(ctx, GL_INVALID_VALUE, "glGetFirstPerfQueryIdINTEL(queryId 
== NULL)");
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetFirstPerfQueryIdINTEL(queryId == NULL)");
   return;
}
 
+   numGroups = ctx->PerfMonitor.NumGroups;
+
/* "If the given hardware platform doesn't support any performance queries,
 * then the value of 0 is returned and INVALID_OPERATION error is raised."
 */
+   if (numGroups == 0) {
+  *queryId = 0;
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glGetFirstPerfQueryIdINTEL(no queries supported)");
+  return;
+   }
 
-   *queryId = 0;
-   _mesa_error(ctx, GL_INVALID_OPERATION, "glGetFirstPerfQueryIdINTEL(no 
queries supported)");
+   *queryId = index_to_queryid(0);
 }
 
 extern void GLAPIENTRY
@@ -667,22 +717,34 @@ _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint 
*nextQueryId)
 
/* "If nextQueryId pointer is equal to 0, an INVALID_VALUE error is
 * generated."
-*
-* "Whenever error is generated, the value of 0 is returned."
 */
if (!nextQueryId) {
-  *nextQueryId = 0;
-  _mesa_error(ctx, GL_INVALID_VALUE, 
"glGetNextPerfQueryIdINTEL(nextQueryId == NULL)");
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetNextPerfQueryIdINTEL(nextQueryId == NULL)");
   return;
}
 
/* "If the specified performance
 * query identifier is invalid then INVALID_VALUE error is generated."
-*
-* No queries are supported, so all queries are invalid.
 */
-   *nextQueryId = 0;
-   _mesa_error(ctx, GL_INVALID_VALUE, "glGetNextPerfQueryIdINTEL(invalid 
query)");
+   if (!queryid_valid(ctx, queryId)) {
+  /* "Whenever error is generated, the value of 0 is returned." */
+  *nextQueryId = 0;
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetNextPerfQueryIdINTEL(invalid query)");
+  return;
+   }
+
+   ++queryId;
+
+   /* "If query identified by queryId is the last query available the value of
+* 0 is returned."
+*/
+   if (!queryid_valid(ctx, queryId)) {
+  *nextQueryId = 0;
+   } else {
+  *nextQueryId = queryId;
+   }
 }
 
 extern void GLAPIENTRY
@@ -690,13 +752,36 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint 
*queryId)
 {
GET_CURRENT_CONTEXT(ctx);
 
+   unsigned i;
+
+   /* "If queryName does not reference a valid query name, an INVALID_VALUE
+* error is generated."
+*/
+   if (!queryName) {
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetPerfQueryIdByNameINTEL(queryName == NULL)");
+  return;
+   }
+
+   /* The specification does not state that this produces an error. */
+   if (!queryId) {
+  return;
+   }
+
+   for (i = 0; i < ctx->PerfMonitor.NumGroups; ++i) {
+  const struct gl_perf_monitor_group *group_obj = get_group(ctx, i);
+  if (strcmp(group_obj->Name, queryName) == 0) {
+ *queryId = index_to_queryid(i);
+ return;
+  }
+   }
+
/* "

[Mesa-dev] [PATCH 6/6] i965: Enable INTEL_performance_query for Gen5+.

2014-03-12 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 5094c2b..78bf5b4 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -307,8 +307,10 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_stencil_texturing = true;
}
 
-   if (brw->gen == 5 || can_write_oacontrol(brw))
+   if (brw->gen == 5 || can_write_oacontrol(brw)) {
   ctx->Extensions.AMD_performance_monitor = true;
+  ctx->Extensions.INTEL_performance_query = true;
+   }
 
if (ctx->API == API_OPENGL_CORE)
   ctx->Extensions.ARB_base_instance = true;
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] Implement INTEL_performance_query

2014-03-12 Thread Petri Latvala
This patch series implements the INTEL_performance_query extension.
With driver code for AMD_performance_monitor already in place,
implementing this extension was fairly straightforward.

Planned future improvements: Proper semantic types for counters and
data normalization (currently all counters are exposed as "RAW"
without per-second maximum). Pseudo-counters for things like "time
spent in shader compilation" or "forced recompilation event count".

Piglit tests for this extension have been sent to piglit mailing
list. All tests pass.

Petri Latvala (6):
  mesa: Add GL_INTEL_performance_query definitions
  Regenerate gl_mangle.h.
  mesa: Add INTEL_performance_query enums to tests/enum_strings.cpp
  mesa: Add core support for the GL_INTEL_performance_query extension.
  mesa: Implement INTEL_performance_query.
  i965: Enable INTEL_performance_query for Gen5+.

 include/GL/gl.h|  45 ++
 include/GL/gl_mangle.h | 371 +++-
 src/mapi/glapi/gen/INTEL_performance_query.xml |  93 
 src/mapi/glapi/gen/Makefile.am |   1 +
 src/mapi/glapi/gen/gl_API.xml  |   2 +
 src/mesa/drivers/dri/i965/intel_extensions.c   |   4 +-
 src/mesa/main/config.h |   8 +
 src/mesa/main/extensions.c |   1 +
 src/mesa/main/get.c|   1 +
 src/mesa/main/get_hash_params.py   |   6 +
 src/mesa/main/mtypes.h |   1 +
 src/mesa/main/performance_monitor.c| 585 +
 src/mesa/main/performance_monitor.h|  43 +-
 src/mesa/main/tests/dispatch_sanity.cpp|  12 +
 src/mesa/main/tests/enum_strings.cpp   |  18 +
 15 files changed, 1183 insertions(+), 8 deletions(-)
 create mode 100644 src/mapi/glapi/gen/INTEL_performance_query.xml

-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] mesa: Add GL_INTEL_performance_query definitions

2014-03-12 Thread Petri Latvala
Until Khronos adds the definitions to glext.h, add definitions for this
extension to gl.h.

Signed-off-by: Petri Latvala 
---
 include/GL/gl.h | 45 +
 1 file changed, 45 insertions(+)

diff --git a/include/GL/gl.h b/include/GL/gl.h
index 4e2932d..e2caf73 100644
--- a/include/GL/gl.h
+++ b/include/GL/gl.h
@@ -2104,6 +2104,51 @@ typedef void (APIENTRYP 
PFNGLEGLIMAGETARGETTEXTURE2DOESPROC) (GLenum target, GLe
 typedef void (APIENTRYP PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC) (GLenum 
target, GLeglImageOES image);
 #endif
 
+#ifndef GL_INTEL_performance_query
+#define GL_INTEL_performance_query 1
+#define GL_PERFQUERY_SINGLE_CONTEXT_INTEL 0x
+#define GL_PERFQUERY_GLOBAL_CONTEXT_INTEL 0x0001
+#define GL_PERFQUERY_WAIT_INTEL   0x83FB
+#define GL_PERFQUERY_FLUSH_INTEL  0x83FA
+#define GL_PERFQUERY_DONOT_FLUSH_INTEL0x83F9
+#define GL_PERFQUERY_COUNTER_EVENT_INTEL  0x94F0
+#define GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL 0x94F1
+#define GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL 0x94F2
+#define GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL 0x94F3
+#define GL_PERFQUERY_COUNTER_RAW_INTEL0x94F4
+#define GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL 0x94F5
+#define GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL 0x94F8
+#define GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL 0x94F9
+#define GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL 0x94FA
+#define GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL 0x94FB
+#define GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL 0x94FC
+#define GL_PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL 0x94FD
+#define GL_PERFQUERY_COUNTER_NAME_LENGTH_MAX_INTEL 0x94FE
+#define GL_PERFQUERY_COUNTER_DESC_LENGTH_MAX_INTEL 0x94FF
+#define GL_PERFQUERY_GPA_EXTENDED_COUNTERS_INTEL 0x9500
+typedef void (GLAPIENTRYP PFNGLBEGINPERFQUERYINTELPROC) (GLuint queryHandle);
+typedef void (GLAPIENTRYP PFNGLCREATEPERFQUERYINTELPROC) (GLuint queryId, 
GLuint *queryHandle);
+typedef void (GLAPIENTRYP PFNGLDELETEPERFQUERYINTELPROC) (GLuint queryHandle);
+typedef void (GLAPIENTRYP PFNGLENDPERFQUERYINTELPROC) (GLuint queryHandle);
+typedef void (GLAPIENTRYP PFNGLGETFIRSTPERFQUERYIDINTELPROC) (GLuint *queryId);
+typedef void (GLAPIENTRYP PFNGLGETNEXTPERFQUERYIDINTELPROC) (GLuint queryId, 
GLuint *nextQueryId);
+typedef void (GLAPIENTRYP PFNGLGETPERFCOUNTERINFOINTELPROC) (GLuint queryId, 
GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint 
counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint 
*counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, 
GLuint64 *rawCounterMaxValue);
+typedef void (GLAPIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, 
GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten);
+typedef void (GLAPIENTRYP PFNGLGETPERFQUERYIDBYNAMEINTELPROC) (GLchar 
*queryName, GLuint *queryId);
+typedef void (GLAPIENTRYP PFNGLGETPERFQUERYINFOINTELPROC) (GLuint queryId, 
GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint 
*noCounters, GLuint *noInstances, GLuint *capsMask);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void GLAPIENTRY glBeginPerfQueryINTEL (GLuint queryHandle);
+GLAPI void GLAPIENTRY glCreatePerfQueryINTEL (GLuint queryId, GLuint 
*queryHandle);
+GLAPI void GLAPIENTRY glDeletePerfQueryINTEL (GLuint queryHandle);
+GLAPI void GLAPIENTRY glEndPerfQueryINTEL (GLuint queryHandle);
+GLAPI void GLAPIENTRY glGetFirstPerfQueryIdINTEL (GLuint *queryId);
+GLAPI void GLAPIENTRY glGetNextPerfQueryIdINTEL (GLuint queryId, GLuint 
*nextQueryId);
+GLAPI void GLAPIENTRY glGetPerfCounterInfoINTEL (GLuint queryId, GLuint 
counterId, GLuint counterNameLength, GLchar *counterName, GLuint 
counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint 
*counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, 
GLuint64 *rawCounterMaxValue);
+GLAPI void GLAPIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint 
flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten);
+GLAPI void GLAPIENTRY glGetPerfQueryIdByNameINTEL (GLchar *queryName, GLuint 
*queryId);
+GLAPI void GLAPIENTRY glGetPerfQueryInfoINTEL (GLuint queryId, GLuint 
queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, 
GLuint *noInstances, GLuint *capsMask);
+#endif
+#endif /* GL_INTEL_performance_query */
 
 /**
  ** NOTE!  If you add new functions to this file, or update
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] mesa: Add core support for the GL_INTEL_performance_query extension.

2014-03-12 Thread Petri Latvala
Like AMD_performance_monitor, this extension provides an interface for
applications (and OpenGL-based tools) to access GPU performance
counters. Since the exact performance counters available vary between
vendors and hardware generations, the extension provides an API the
application can use to get the names, types, and minimum/maximum
values of all available counters.

Applications create performance queries based on available query
types, and begin/end measurement collection. Multiple queries can be
measuring simultaneously.

Signed-off-by: Petri Latvala 
---
 src/mapi/glapi/gen/INTEL_performance_query.xml |  93 +
 src/mapi/glapi/gen/Makefile.am |   1 +
 src/mapi/glapi/gen/gl_API.xml  |   2 +
 src/mesa/main/config.h |   8 ++
 src/mesa/main/extensions.c |   1 +
 src/mesa/main/get.c|   1 +
 src/mesa/main/get_hash_params.py   |   6 +
 src/mesa/main/mtypes.h |   1 +
 src/mesa/main/performance_monitor.c| 183 +
 src/mesa/main/performance_monitor.h|  43 +-
 src/mesa/main/tests/dispatch_sanity.cpp|  12 ++
 11 files changed, 350 insertions(+), 1 deletion(-)
 create mode 100644 src/mapi/glapi/gen/INTEL_performance_query.xml

diff --git a/src/mapi/glapi/gen/INTEL_performance_query.xml 
b/src/mapi/glapi/gen/INTEL_performance_query.xml
new file mode 100644
index 000..333856f
--- /dev/null
+++ b/src/mapi/glapi/gen/INTEL_performance_query.xml
@@ -0,0 +1,93 @@
+
+
+
+
+
+
+
+  
+
+  
+
+  
+
+
+  
+
+  
+
+
+  
+
+  
+
+
+
+
+
+
+
+  
+
+  
+
+
+
+
+
+
+
+
+
+
+
+  
+
+  
+
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+
+
+
+
+  
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
+
+
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index 7b3c118..1c5b61c 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -147,6 +147,7 @@ API_XML = \
EXT_texture_array.xml \
EXT_texture_integer.xml \
EXT_transform_feedback.xml \
+   INTEL_performance_query.xml \
NV_conditional_render.xml \
NV_primitive_restart.xml \
NV_texture_barrier.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 9129d57..8fbf700 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -12840,6 +12840,8 @@
 
 
 
+http://www.w3.org/2001/XInclude"/>
+
 
 
 
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 30da5d4..c96502a 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -281,6 +281,14 @@
 #define MAX_VERTEX_STREAMS  4
 /*@}*/
 
+/** For GL_INTEL_performance_query */
+/*@{*/
+#define MAX_PERFQUERY_QUERY_NAME_LENGTH 256
+#define MAX_PERFQUERY_COUNTER_NAME_LENGTH   256
+#define MAX_PERFQUERY_COUNTER_DESC_LENGTH   1024
+#define PERFQUERY_HAVE_GPA_EXTENDED_COUNTERS 0
+/*@}*/
+
 /*
  * Color channel component order
  * 
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index a72284c..e8f125d 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -330,6 +330,7 @@ static const struct extension extension_table[] = {
{ "GL_IBM_rasterpos_clip",  o(dummy_true),  
GLL,1996 },
{ "GL_IBM_texture_mirrored_repeat", o(dummy_true),  
GLL,1998 },
{ "GL_INGR_blend_func_separate",o(EXT_blend_func_separate), 
GLL,1999 },
+   { "GL_INTEL_performance_query", o(INTEL_performance_query), 
GL | ES2,   2013 },
{ "GL_MESA_pack_invert",o(MESA_pack_invert),
GL, 2002 },
{ "GL_MESA_texture_signed_rgba",o(EXT_texture_snorm),   
GL, 2009 },
{ "GL_MESA_window_pos", o(dummy_true),  
GLL,2000 },
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index b190851..ba58f02 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -394,6 +394,7 @@ EXTRA_EXT(ARB_viewport_array);
 EXTRA_EXT(ARB_compute_shader);
 EXTRA_EXT(ARB_gpu_shader5);
 EXTRA_EXT2(ARB_transform_feedback3, ARB_gpu_shader5);
+EXTRA_EXT(INTEL_performance_query);
 
 static const int
 extra_ARB_color_buffer_float_or_glcore[] = {
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 674d003..4c0838e 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -311,6 +3

Re: [Mesa-dev] [PATCH 1/4] mesa/main: add ARB_clear_texture entrypoints

2014-03-06 Thread Petri Latvala

On 03/07/2014 08:41 AM, Ilia Mirkin wrote:

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index a72284c..77da1eb 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -86,6 +86,7 @@ static const struct extension extension_table[] = {
 { "GL_ARB_buffer_storage",  o(ARB_buffer_storage), 
 GL, 2013 },
 { "GL_ARB_clear_buffer_object", o(dummy_true), 
 GL, 2012 },
 { "GL_ARB_color_buffer_float",  o(ARB_color_buffer_float), 
 GL, 2004 },
+   { "GL_ARB_clear_texture",   o(ARB_clear_texture),   
GL, 2013 },
 { "GL_ARB_compute_shader",  o(ARB_compute_shader), 
 GL, 2012 },
 { "GL_ARB_copy_buffer", o(dummy_true), 
 GL, 2008 },
 { "GL_ARB_conservative_depth",  o(ARB_conservative_depth), 
 GL, 2011 },



Move that line one line up to get the list alphabetically sorted.


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Assert array index on access to vec4_visitor's arrays.

2014-02-27 Thread Petri Latvala
v2: vec4_visitor::pack_uniform_registers(): Use correct comparison in the
  assert, this->uniforms is already adjusted. Compare the actual value used to
  index uniform_size and uniform_vector_size instead.

Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index fcb7365..c30afae 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -422,6 +422,7 @@ vec4_visitor::pack_uniform_registers()
 * push constants.
 */
for (int src = 0; src < uniforms; src++) {
+  assert(src < uniform_array_size);
   int size = this->uniform_vector_size[src];
 
   if (!uniform_used[src]) {
@@ -1404,6 +1405,7 @@ vec4_visitor::setup_uniforms(int reg)
 * matter what, or the GPU would hang.
 */
if (brw->gen < 6 && this->uniforms == 0) {
+  assert(this->uniforms < this->uniform_array_size);
   this->uniform_vector_size[this->uniforms] = 1;
 
   stage_prog_data->param =
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index cc92a8a..88ee929 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -664,6 +664,7 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
storage->type->matrix_columns);
 
   for (unsigned s = 0; s < vector_count; s++) {
+ assert(uniforms < uniform_array_size);
  uniform_vector_size[uniforms] = storage->type->vector_elements;
 
  int i;
@@ -687,6 +688,7 @@ vec4_visitor::setup_uniform_clipplane_values()
gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
+  assert(this->uniforms < uniform_array_size);
   this->uniform_vector_size[this->uniforms] = 4;
   this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
   this->userplane[i].type = BRW_REGISTER_TYPE_F;
@@ -717,6 +719,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
(gl_state_index *)slots[i].tokens);
   float *values = &this->prog->Parameters->ParameterValues[index][0].f;
 
+  assert(this->uniforms < uniform_array_size);
   this->uniform_vector_size[this->uniforms] = 0;
   /* Add each of the unique swizzled channels of the element.
* This will end up matching the size of the glsl_type of this field.
@@ -727,6 +730,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 last_swiz = swiz;
 
 stage_prog_data->param[this->uniforms * 4 + j] = &values[swiz];
+assert(this->uniforms < uniform_array_size);
 if (swiz <= last_swiz)
this->uniform_vector_size[this->uniforms]++;
   }
@@ -976,6 +980,7 @@ vec4_visitor::visit(ir_variable *ir)
   /* Track how big the whole uniform variable is, in case we need to put a
* copy of its data into pull constants for array access.
*/
+  assert(this->uniforms < uniform_array_size);
   this->uniform_size[this->uniforms] = type_size(ir->type);
 
   if (!strncmp(ir->name, "gl_", 3)) {
@@ -3302,6 +3307,7 @@ 
vec4_visitor::move_uniform_array_access_to_pull_constants()
 
pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
 
+   assert(uniform < uniform_array_size);
for (int j = 0; j < uniform_size[uniform] * 4; j++) {
   stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
   = values[j];
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] v5: Fix array overrun with too many uniforms

2014-02-27 Thread Petri Latvala
Fixing https://bugs.freedesktop.org/show_bug.cgi?id=71254 once again.

Rebased for current master, plumbing-class changes to patch 1/2. Fixed
one of the asserts in patch 2/2.

Full piglit run shows no regressions.

Petri Latvala (2):
  i965: Allocate vec4_visitor's uniform_size and uniform_vector_size
arrays dynamically.
  i965: Assert array index on access to vec4_visitor's arrays.

 src/mesa/drivers/dri/i965/brw_vec4.cpp |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4.h   |  5 +++--
 src/mesa/drivers/dri/i965/brw_vec4_gs.c|  5 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 17 +
 src/mesa/drivers/dri/i965/brw_vs.c |  8 
 5 files changed, 35 insertions(+), 2 deletions(-)

-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2014-02-27 Thread Petri Latvala
v2: Don't add function parameters, pass the required size in
prog_data->nr_params.

v3:
- Use the name uniform_array_size instead of uniform_param_count.
- Round up when dividing param_count by 4.
- Use MAX2() instead of taking the maximum by hand.
- Don't crash if prog_data passed to vec4_visitor constructor is NULL

v4: Rebase for current master

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71254

Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |  5 +++--
 src/mesa/drivers/dri/i965/brw_vec4_gs.c|  5 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +++
 src/mesa/drivers/dri/i965/brw_vs.c |  8 
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index fb5c0a6..d97c103 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -387,8 +387,9 @@ public:
 */
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
-   int uniform_size[MAX_UNIFORMS];
-   int uniform_vector_size[MAX_UNIFORMS];
+   int *uniform_size;
+   int *uniform_vector_size;
+   int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
int uniforms;
 
src_reg shader_start_time;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index 85fb979..8a9625b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -68,6 +68,11 @@ do_gs_prog(struct brw_context *brw,
   rzalloc_array(NULL, const float *, param_count);
c.prog_data.base.base.pull_param =
   rzalloc_array(NULL, const float *, param_count);
+   /* Setting nr_params here NOT to the size of the param and pull_param
+* arrays, but to the number of uniform components vec4_visitor
+* needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+*/
+   c.prog_data.base.base.nr_params = ALIGN(param_count, 4) / 4 + 
gs->num_samplers;
 
if (gp->program.OutputType == GL_POINTS) {
   /* When the output type is points, the geometry shader may output data
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index aedab93..cc92a8a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3401,6 +3401,17 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
 
this->uniforms = 0;
+
+   /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
+* at least one. See setup_uniforms() in brw_vec4.cpp.
+*/
+   this->uniform_array_size = 1;
+   if (prog_data) {
+  this->uniform_array_size = MAX2(stage_prog_data->nr_params, 1);
+   }
+
+   this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
+   this->uniform_vector_size = rzalloc_array(mem_ctx, int, 
this->uniform_array_size);
 }
 
 vec4_visitor::~vec4_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index d3dbc8e..ef8dcf1 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -235,6 +235,14 @@ do_vs_prog(struct brw_context *brw,
 
stage_prog_data->param = rzalloc_array(NULL, const float *, param_count);
stage_prog_data->pull_param = rzalloc_array(NULL, const float *, 
param_count);
+   /* Setting nr_params here NOT to the size of the param and pull_param
+* arrays, but to the number of uniform components vec4_visitor
+* needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+*/
+   stage_prog_data->nr_params = ALIGN(param_count, 4) / 4;
+   if (vs) {
+  stage_prog_data->nr_params += vs->num_samplers;
+   }
 
GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
prog_data.inputs_read = vp->program.Base.InputsRead;
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Assert array index on access to vec4_visitor's arrays.

2014-02-27 Thread Petri Latvala

On 02/27/2014 12:30 AM, Ian Romanick wrote:
Did you piglit run this?  I applied it on current master, and about 
2,700 piglit tests assert.




Yes, at the time there were no piglit errors. Confirmed, the patch 
series breaks current master.


Not to mention it doesn't apply cleanly anymore anyway, new series required.




diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp

index fb57707..4dc0482 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -411,6 +411,7 @@ vec4_visitor::pack_uniform_registers()
 /* Now, figure out a packing of the live uniform vectors into our
  * push constants.
  */
+   assert(uniforms < uniform_array_size);


All of the tests that I looked at (about a dozen) hit this assertion.




From tests/quick, with some grepping and processing on piglit result 
json directly, all those assert failures are this one.


*quick debugging build later*

And all those asserts hit because uniforms == uniform_array_size.


Fix is on the way.


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] glx: add missing null check in SendMakeCurrentRequest

2014-02-11 Thread Petri Latvala

On 02/11/2014 04:07 PM, Juha-Pekka Heikkila wrote:

Signed-off-by: Juha-Pekka Heikkila 
---
  src/glx/indirect_glx.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glx/indirect_glx.c b/src/glx/indirect_glx.c
index 28b8cd0..41048f4 100644
--- a/src/glx/indirect_glx.c
+++ b/src/glx/indirect_glx.c
@@ -84,7 +84,7 @@ SendMakeCurrentRequest(Display * dpy, CARD8 opcode,
 * not the SGI extension.
 */
  
-  if ((priv->majorVersion > 1) || (priv->minorVersion >= 3)) {

+  if (priv && (priv->majorVersion > 1) || (priv->minorVersion >= 3)) {
   xGLXMakeContextCurrentReq *req;
  
   GetReq(GLXMakeContextCurrent, req);


If priv is NULL, this code still accesses priv->minorVersion. You need

  if (priv && ((priv->majorVersion > 1) || (priv->minorVersion >= 3)))



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] v4: Fix array overrun with too many uniforms

2014-02-05 Thread Petri Latvala

On 12/30/2013 11:44 AM, Petri Latvala wrote:

Fourth version of patch series.

- Fixed vec4_register_coalesce unit test. That test passes NULL for
   prog_data, which Mesa proper doesn't do.
- Renamed uniform_param_count to uniform_array_size.
- Used ALIGN() to round up when dividing buffer size by 4.
- Used MAX2() instead of taking maximum manually.


Petri Latvala (2):
   i965: Allocate vec4_visitor's uniform_size and uniform_vector_size
 arrays dynamically.
   i965: Assert array index on access to vec4_visitor's arrays.

  src/mesa/drivers/dri/i965/brw_vec4.cpp |  2 ++
  src/mesa/drivers/dri/i965/brw_vec4.h   |  5 +++--
  src/mesa/drivers/dri/i965/brw_vec4_gs.c|  5 +
  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 17 +
  src/mesa/drivers/dri/i965/brw_vs.c |  8 
  5 files changed, 35 insertions(+), 2 deletions(-)



Gentle prod and ping.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] v4: Fix array overrun with too many uniforms

2013-12-30 Thread Petri Latvala
Fourth version of patch series.

- Fixed vec4_register_coalesce unit test. That test passes NULL for
  prog_data, which Mesa proper doesn't do.
- Renamed uniform_param_count to uniform_array_size.
- Used ALIGN() to round up when dividing buffer size by 4.
- Used MAX2() instead of taking maximum manually.


Petri Latvala (2):
  i965: Allocate vec4_visitor's uniform_size and uniform_vector_size
arrays dynamically.
  i965: Assert array index on access to vec4_visitor's arrays.

 src/mesa/drivers/dri/i965/brw_vec4.cpp |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4.h   |  5 +++--
 src/mesa/drivers/dri/i965/brw_vec4_gs.c|  5 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 17 +
 src/mesa/drivers/dri/i965/brw_vs.c |  8 
 5 files changed, 35 insertions(+), 2 deletions(-)

-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Assert array index on access to vec4_visitor's arrays.

2013-12-30 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index fb57707..4dc0482 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -411,6 +411,7 @@ vec4_visitor::pack_uniform_registers()
/* Now, figure out a packing of the live uniform vectors into our
 * push constants.
 */
+   assert(uniforms < uniform_array_size);
for (int src = 0; src < uniforms; src++) {
   int size = this->uniform_vector_size[src];
 
@@ -1393,6 +1394,7 @@ vec4_visitor::setup_uniforms(int reg)
 * matter what, or the GPU would hang.
 */
if (brw->gen < 6 && this->uniforms == 0) {
+  assert(this->uniforms < this->uniform_array_size);
   this->uniform_vector_size[this->uniforms] = 1;
 
   prog_data->param = reralloc(NULL, prog_data->param, const float *, 4);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a93fdc5..8d4c557 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -662,6 +662,7 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
storage->type->matrix_columns);
 
   for (unsigned s = 0; s < vector_count; s++) {
+ assert(uniforms < uniform_array_size);
  uniform_vector_size[uniforms] = storage->type->vector_elements;
 
  int i;
@@ -685,6 +686,7 @@ vec4_visitor::setup_uniform_clipplane_values()
gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
+  assert(this->uniforms < uniform_array_size);
   this->uniform_vector_size[this->uniforms] = 4;
   this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
   this->userplane[i].type = BRW_REGISTER_TYPE_F;
@@ -715,6 +717,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
(gl_state_index *)slots[i].tokens);
   float *values = &this->prog->Parameters->ParameterValues[index][0].f;
 
+  assert(this->uniforms < uniform_array_size);
   this->uniform_vector_size[this->uniforms] = 0;
   /* Add each of the unique swizzled channels of the element.
* This will end up matching the size of the glsl_type of this field.
@@ -725,6 +728,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 last_swiz = swiz;
 
 prog_data->param[this->uniforms * 4 + j] = &values[swiz];
+assert(this->uniforms < uniform_array_size);
 if (swiz <= last_swiz)
this->uniform_vector_size[this->uniforms]++;
   }
@@ -983,6 +987,7 @@ vec4_visitor::visit(ir_variable *ir)
   /* Track how big the whole uniform variable is, in case we need to put a
* copy of its data into pull constants for array access.
*/
+  assert(this->uniforms < uniform_array_size);
   this->uniform_size[this->uniforms] = type_size(ir->type);
 
   if (!strncmp(ir->name, "gl_", 3)) {
@@ -3228,6 +3233,7 @@ 
vec4_visitor::move_uniform_array_access_to_pull_constants()
 
pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
 
+   assert(uniform < uniform_array_size);
for (int j = 0; j < uniform_size[uniform] * 4; j++) {
   prog_data->pull_param[prog_data->nr_pull_params++]
   = values[j];
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-12-30 Thread Petri Latvala
v2: Don't add function parameters, pass the required size in
prog_data->nr_params.

v3:
- Use the name uniform_array_size instead of uniform_param_count.
- Round up when dividing param_count by 4.
- Use MAX2() instead of taking the maximum by hand.
- Don't crash if prog_data passed to vec4_visitor constructor is NULL

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71254

Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |  5 +++--
 src/mesa/drivers/dri/i965/brw_vec4_gs.c|  5 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +++
 src/mesa/drivers/dri/i965/brw_vs.c |  8 
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index d4029d8..798d8bd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -325,8 +325,9 @@ public:
 */
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
-   int uniform_size[MAX_UNIFORMS];
-   int uniform_vector_size[MAX_UNIFORMS];
+   int *uniform_size;
+   int *uniform_vector_size;
+   int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
int uniforms;
 
src_reg shader_start_time;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index 018b0b6..c749eb1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -64,6 +64,11 @@ do_gs_prog(struct brw_context *brw,
 
c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
param_count);
+   /* Setting nr_params here NOT to the size of the param and pull_param
+* arrays, but to the number of uniform components vec4_visitor
+* needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+*/
+   c.prog_data.base.nr_params = ALIGN(param_count, 4) / 4 + gs->num_samplers;
 
if (gp->program.OutputType == GL_POINTS) {
   /* When the output type is points, the geometry shader may output data
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3b8cef6..a93fdc5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3321,6 +3321,17 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
 
this->uniforms = 0;
+
+   /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
+* at least one. See setup_uniforms() in brw_vec4.cpp.
+*/
+   this->uniform_array_size = 1;
+   if (prog_data) {
+  this->uniform_array_size = MAX2(prog_data->nr_params, 1);
+   }
+
+   this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
+   this->uniform_vector_size = rzalloc_array(mem_ctx, int, 
this->uniform_array_size);
 }
 
 vec4_visitor::~vec4_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index 5d50c3c..609a575 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -242,6 +242,14 @@ do_vs_prog(struct brw_context *brw,
 
prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
+   /* Setting nr_params here NOT to the size of the param and pull_param
+* arrays, but to the number of uniform components vec4_visitor
+* needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+*/
+   prog_data.base.nr_params = ALIGN(param_count, 4) / 4;
+   if (vs) {
+  prog_data.base.nr_params += vs->num_samplers;
+   }
 
GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
prog_data.inputs_read = vp->program.Base.InputsRead;
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-12-30 Thread Petri Latvala

On 12/20/2013 08:54 PM, Ian Romanick wrote:

This patch breaks the test_vec4_register_coalesce unit test.  Did you
run 'make check'?




I thought I did, but turns out I didn't. Just ran piglit tests.

Fix coming up.


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-12-30 Thread Petri Latvala

On 12/20/2013 09:26 PM, Kenneth Graunke wrote:

On 11/27/2013 05:28 AM, Petri Latvala wrote:

v2: Don't add function parameters, pass the required size in
prog_data->nr_params.

Signed-off-by: Petri Latvala 
---
  src/mesa/drivers/dri/i965/brw_vec4.h   | 5 +++--
  src/mesa/drivers/dri/i965/brw_vec4_gs.c| 5 +
  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 +++
  src/mesa/drivers/dri/i965/brw_vs.c | 8 
  4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5cec9f9..5f5f5cd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -325,8 +325,9 @@ public:
  */
 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
-   int uniform_size[MAX_UNIFORMS];
-   int uniform_vector_size[MAX_UNIFORMS];
+   int *uniform_size;
+   int *uniform_vector_size;
+   int uniform_param_count; /*< Size of uniform_[vector_]size arrays */

I'm not crazy about this variable name.  Between the params arrays,
uniform_* arrays, nr_params count, and uniforms count...we already have
a lot of distinct things that sound alike.

How about:

int uniform_array_size; /**< Size of uniform_[vector_]size arrays. */

That seems clearer to me.  Especially seeing that the value here is
really the size of the array, which is an overestimate/upper bound on
the number of uniforms, not the actual number of elements in the
uniforms or params arrays.


Sounds good. Changing the name.



 int uniforms;
  
 src_reg shader_start_time;

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index 018b0b6..7cf9bac 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -64,6 +64,11 @@ do_gs_prog(struct brw_context *brw,
  
 c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);

 c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
param_count);
+   /* Setting nr_params here NOT to the size of the param and pull_param
+* arrays, but to the number of uniform components vec4_visitor
+* needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+*/
+   c.prog_data.base.nr_params = param_count / 4 + gs->num_samplers;

Hmm.  You're counting the number of vec4s, but...don't samplers take up
a single entry, since they're just integers?  This seems odd to me.


No, that value is the number of float-size components. param_count, 
calculated above that code, multiplies the number of components by 4 to 
have room elsewhere for scalar params that eat up a vec4. I'm just 
dividing the number back.



You might also consider doing ALIGN(param_count, 4) / 4 so that you
round up rather than truncating on the division.


Ok, changing that. Here and in vs.


I also would really like to keep nr_params in consistent units, i.e.
always uniform float-size components or always vec4s.


I would really like that too, but unfortunately the inconsistency was 
already present.


The best option would be to have another variable for uniform memory 
use, but I didn't want to make too invasive changes. nr_param gets used 
here for uniform float-size counts, and later on for something else. 
This results in some overuse of memory when uniforms are smaller than vec4s.



 if (gp->program.OutputType == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a13eafb..b9226dc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3253,6 +3253,10 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
   fail_msg(NULL),
   first_non_payload_grf(0),
   need_all_constants_in_pull_buffer(false),
+ /* Initialize uniform_param_count to at least 1 because gen6 VS requires 
at
+  * least one. See setup_uniforms() in brw_vec4.cpp.
+  */

I think you mean "Gen4-5 requires at least one push constant", not "gen6
VS."  At least, that's what setup_uniforms() is doing.


Argh, yes. Fixing that comment.


+ uniform_param_count(prog_data->nr_params ? prog_data->nr_params : 1),

I think this would be clearer as:

MAX2(prog_data->nr_params, 1)


Yes, changing.


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] v3: Fix array overrun with too many uniforms

2013-12-17 Thread Petri Latvala

On 11/27/2013 03:28 PM, Petri Latvala wrote:

Third version of this patch series sent in full



Ping? Comments, NAKs, ACKs...?


--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/23] i965: Define common register base class shared between both back-ends.

2013-12-02 Thread Petri Latvala

On 12/02/2013 10:36 PM, Francisco Jerez wrote:

Would you prefer 'this->operator=(reg);'?

I just remembered...  The reason this wouldn't work is that it would
trigger an implicit conversion from 'backend_reg' to 'fs_reg', causing
infinite recursion into the fs_reg constructor.




For the record, this->backend_reg::operator=(reg) would have done what 
you aimed for.


Not saying it kosher either way though, calling assignment operator in a 
constructor is iffy :P. The proper way is selecting the correct base 
class constructor to call.



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Fw: Taking part in MESA development - Dissertation Project

2013-11-28 Thread Petri Latvala

On 11/28/2013 11:15 PM, Timothy Arceri wrote:

Hi guys,

I received the following submitted as an Issue on my github account.
Maybe someone here has a project they can suggest.


Is NewbieProjects too "newbie" for this?

http://wiki.freedesktop.org/dri/NewbieProjects/



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Assert array index on access to vec4_visitor's arrays.

2013-11-27 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 73f91a0..0cbdc98 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -410,6 +410,7 @@ vec4_visitor::pack_uniform_registers()
/* Now, figure out a packing of the live uniform vectors into our
 * push constants.
 */
+   assert(uniforms < uniform_param_count);
for (int src = 0; src < uniforms; src++) {
   int size = this->uniform_vector_size[src];
 
@@ -1315,6 +1316,7 @@ vec4_visitor::setup_uniforms(int reg)
 * matter what, or the GPU would hang.
 */
if (brw->gen < 6 && this->uniforms == 0) {
+  assert(this->uniforms < this->uniform_param_count);
   this->uniform_vector_size[this->uniforms] = 1;
 
   prog_data->param = reralloc(NULL, prog_data->param, const float *, 4);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index b9226dc..d01c8d5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -662,6 +662,7 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
storage->type->matrix_columns);
 
   for (unsigned s = 0; s < vector_count; s++) {
+ assert(uniforms < uniform_param_count);
  uniform_vector_size[uniforms] = storage->type->vector_elements;
 
  int i;
@@ -685,6 +686,7 @@ vec4_visitor::setup_uniform_clipplane_values()
gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
+  assert(this->uniforms < uniform_param_count);
   this->uniform_vector_size[this->uniforms] = 4;
   this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
   this->userplane[i].type = BRW_REGISTER_TYPE_F;
@@ -715,6 +717,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
(gl_state_index *)slots[i].tokens);
   float *values = &this->prog->Parameters->ParameterValues[index][0].f;
 
+  assert(this->uniforms < uniform_param_count);
   this->uniform_vector_size[this->uniforms] = 0;
   /* Add each of the unique swizzled channels of the element.
* This will end up matching the size of the glsl_type of this field.
@@ -725,6 +728,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 last_swiz = swiz;
 
 prog_data->param[this->uniforms * 4 + j] = &values[swiz];
+assert(this->uniforms < uniform_param_count);
 if (swiz <= last_swiz)
this->uniform_vector_size[this->uniforms]++;
   }
@@ -983,6 +987,7 @@ vec4_visitor::visit(ir_variable *ir)
   /* Track how big the whole uniform variable is, in case we need to put a
* copy of its data into pull constants for array access.
*/
+  assert(this->uniforms < uniform_param_count);
   this->uniform_size[this->uniforms] = type_size(ir->type);
 
   if (!strncmp(ir->name, "gl_", 3)) {
@@ -3197,6 +3202,7 @@ 
vec4_visitor::move_uniform_array_access_to_pull_constants()
 
pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
 
+   assert(uniform < uniform_param_count);
for (int j = 0; j < uniform_size[uniform] * 4; j++) {
   prog_data->pull_param[prog_data->nr_pull_params++]
   = values[j];
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-27 Thread Petri Latvala
v2: Don't add function parameters, pass the required size in
prog_data->nr_params.

Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4.h   | 5 +++--
 src/mesa/drivers/dri/i965/brw_vec4_gs.c| 5 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 +++
 src/mesa/drivers/dri/i965/brw_vs.c | 8 
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5cec9f9..5f5f5cd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -325,8 +325,9 @@ public:
 */
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
-   int uniform_size[MAX_UNIFORMS];
-   int uniform_vector_size[MAX_UNIFORMS];
+   int *uniform_size;
+   int *uniform_vector_size;
+   int uniform_param_count; /*< Size of uniform_[vector_]size arrays */
int uniforms;
 
src_reg shader_start_time;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index 018b0b6..7cf9bac 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -64,6 +64,11 @@ do_gs_prog(struct brw_context *brw,
 
c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
param_count);
+   /* Setting nr_params here NOT to the size of the param and pull_param
+* arrays, but to the number of uniform components vec4_visitor
+* needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+*/
+   c.prog_data.base.nr_params = param_count / 4 + gs->num_samplers;
 
if (gp->program.OutputType == GL_POINTS) {
   /* When the output type is points, the geometry shader may output data
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a13eafb..b9226dc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3253,6 +3253,10 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
  fail_msg(NULL),
  first_non_payload_grf(0),
  need_all_constants_in_pull_buffer(false),
+ /* Initialize uniform_param_count to at least 1 because gen6 VS requires 
at
+  * least one. See setup_uniforms() in brw_vec4.cpp.
+  */
+ uniform_param_count(prog_data->nr_params ? prog_data->nr_params : 1),
  debug_flag(debug_flag),
  no_spills(no_spills)
 {
@@ -3290,6 +3294,9 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
 
this->uniforms = 0;
+
+   this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_param_count);
+   this->uniform_vector_size = rzalloc_array(mem_ctx, int, 
this->uniform_param_count);
 }
 
 vec4_visitor::~vec4_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index b5c8b63..8d0933d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -242,6 +242,14 @@ do_vs_prog(struct brw_context *brw,
 
prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
+   /* Setting nr_params here NOT to the size of the param and pull_param
+* arrays, but to the number of uniform components vec4_visitor
+* needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+*/
+   prog_data.base.nr_params = param_count / 4;
+   if (vs) {
+  prog_data.base.nr_params += vs->num_samplers;
+   }
 
GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
prog_data.inputs_read = vp->program.Base.InputsRead;
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] v3: Fix array overrun with too many uniforms

2013-11-27 Thread Petri Latvala
Third version of this patch series sent in full, both patches changed
a bit.

As per advice, I assigned the required size to prog_data->nr_params,
which is unused until vec4_visitor::setup_uniforms() sets it
again. This is done in do_vs_prog() and do_gs_prog(), with
accompanying comments that explain the implications.

I tested this patch with a test program and ran piglit tests, until no
regressions were found. To my understanding, the allocated size is
enough for user-defined uniforms, builtin uniforms, clip planes, and
sampler variables, with some extra which I think comes from padding
somewhere earlier. Extra means something like 3 or 7 too much, not a
factor too much.

I removed the noncopyable chunk by request too, that will possibly be
sent later in a separate patch.


Petri Latvala (2):
  i965: Allocate vec4_visitor's uniform_size and uniform_vector_size
arrays dynamically.
  i965: Assert array index on access to vec4_visitor's arrays.

 src/mesa/drivers/dri/i965/brw_vec4.cpp |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4.h   |  5 +++--
 src/mesa/drivers/dri/i965/brw_vec4_gs.c|  5 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 13 +
 src/mesa/drivers/dri/i965/brw_vs.c |  8 
 5 files changed, 31 insertions(+), 2 deletions(-)

-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-25 Thread Petri Latvala

On 11/22/2013 08:05 PM, Francisco Jerez wrote:

Petri Latvala  writes:


[...]
@@ -325,8 +326,9 @@ public:
  */
 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
-   int uniform_size[MAX_UNIFORMS];
-   int uniform_vector_size[MAX_UNIFORMS];
+   unsigned uniform_param_count;
+   int* uniform_size;
+   int* uniform_vector_size;
 int uniforms;


All the code around you uses the 'type *identifier' convention, please
keep it consistent.


*slap on forehead* note to self, disable muscle memory. Fixing that.


 src_reg shader_start_time;
[...]
@@ -556,7 +558,7 @@ brw_gs_emit(struct brw_context *brw,
 if (likely(!(INTEL_DEBUG&  DEBUG_NO_DUAL_OBJECT_GS))) {
c->prog_data.dual_instanced_dispatch = false;

-  vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */);
+  vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */, 
param_count);

Another possibility would be to set 'c.prog_data.base.nr_params =
param_count' here and in brw_vs_emit(), so you'd have the same value
available from the constructor of vec4_visitor without all the parameter
changes.


When trying this change, I began to wonder if I'm using the right values 
at all. What is that nr_params supposed to be for code elsewhere? 
vec4_visitor::setup_uniforms() sets it to this->uniforms * 4, but 
param_count that I passed from do_vs_prog() to brw_vs_emit() is 
num_uniform_components * 4, yielding (close to) this->uniforms * 4 * 4. 
Close to means that my test program gives param_count being 92, and 
this->uniforms * 4 in setup_uniforms() is 24.


Is vec4_visitor::setup_uniforms() supposed to be the authoritative 
source of the uniform counts or is that nr_params assignment just for 
the pre-gen6 hack in the same function? Is nr_params supposed to be the 
number of uniform components or uniforms? To my understanding 
param_count in do_vs_prog() is the number of uniform components required 
to hold all uniforms with a vec4 for each one (float or otherwise), are 
the floats packed to vec4s anywhere?



--
Petri Latvala

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Assert array index on access to vec4_visitor's arrays.

2013-11-22 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index df38dab..511b080 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -662,6 +662,7 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
storage->type->matrix_columns);
 
   for (unsigned s = 0; s < vector_count; s++) {
+ assert(uniforms < uniform_param_count);
  uniform_vector_size[uniforms] = storage->type->vector_elements;
 
  int i;
@@ -685,6 +686,7 @@ vec4_visitor::setup_uniform_clipplane_values()
gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
+  assert(this->uniforms < uniform_param_count);
   this->uniform_vector_size[this->uniforms] = 4;
   this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
   this->userplane[i].type = BRW_REGISTER_TYPE_F;
@@ -715,6 +717,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
(gl_state_index *)slots[i].tokens);
   float *values = &this->prog->Parameters->ParameterValues[index][0].f;
 
+  assert(this->uniforms < uniform_param_count);
   this->uniform_vector_size[this->uniforms] = 0;
   /* Add each of the unique swizzled channels of the element.
* This will end up matching the size of the glsl_type of this field.
@@ -725,6 +728,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 last_swiz = swiz;
 
 prog_data->param[this->uniforms * 4 + j] = &values[swiz];
+assert(this->uniforms < uniform_param_count);
 if (swiz <= last_swiz)
this->uniform_vector_size[this->uniforms]++;
   }
@@ -983,6 +987,7 @@ vec4_visitor::visit(ir_variable *ir)
   /* Track how big the whole uniform variable is, in case we need to put a
* copy of its data into pull constants for array access.
*/
+  assert(this->uniforms < uniform_param_count);
   this->uniform_size[this->uniforms] = type_size(ir->type);
 
   if (!strncmp(ir->name, "gl_", 3)) {
@@ -3197,6 +3202,7 @@ 
vec4_visitor::move_uniform_array_access_to_pull_constants()
 
pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
 
+   assert(uniform < uniform_param_count);
for (int j = 0; j < uniform_size[uniform] * 4; j++) {
   prog_data->pull_param[prog_data->nr_pull_params++]
   = values[j];
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] v2: Fix array overrun with too many uniforms

2013-11-22 Thread Petri Latvala
Second attempt at fixing
https://bugs.freedesktop.org/show_bug.cgi?id=71254

vec4_visitor::uniform_size and vec4_visitor::uniform_vector_size are
now allocated dynamically based on param_count from do_vs_prog.

I also made vec4_visitor noncopyable. It is not copied currently, and
to my understanding copying it would be nonsense anyway.

The passed param_count is saved in vec4_visitor for the asserts in
patch 2. That could be wrapped in #ifndef NDEBUG but I wasn't sure
whether that is desired. It would change the ABI for that class
depending on compile flags, but the only use for it is internal.


Petri Latvala (2):
  i965: Allocate vec4_visitor's uniform_size and uniform_vector_size
arrays dynamically.
  i965: Assert array index on access to vec4_visitor's arrays.

 src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 +++--
 src/mesa/drivers/dri/i965/brw_vec4.h  | 14 +++---
 src/mesa/drivers/dri/i965/brw_vec4_gs.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |  6 --
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 13 -
 src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  6 --
 src/mesa/drivers/dri/i965/brw_vs.c|  2 +-
 src/mesa/drivers/dri/i965/brw_vs.h|  6 --
 9 files changed, 47 insertions(+), 19 deletions(-)

-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-11-22 Thread Petri Latvala
Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 +++--
 src/mesa/drivers/dri/i965/brw_vec4.h  | 14 +++---
 src/mesa/drivers/dri/i965/brw_vec4_gs.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 12 +++-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |  6 --
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|  7 ++-
 src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp |  6 --
 src/mesa/drivers/dri/i965/brw_vs.c|  2 +-
 src/mesa/drivers/dri/i965/brw_vs.h|  6 --
 9 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 73f91a0..3da1b4d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1558,7 +1558,8 @@ brw_vs_emit(struct brw_context *brw,
 struct brw_vs_compile *c,
 struct brw_vs_prog_data *prog_data,
 void *mem_ctx,
-unsigned *final_assembly_size)
+unsigned *final_assembly_size,
+unsigned param_count)
 {
bool start_busy = false;
float start_time = 0;
@@ -1585,7 +1586,7 @@ brw_vs_emit(struct brw_context *brw,
   }
}
 
-   vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx);
+   vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx, param_count);
if (!v.run()) {
   if (prog) {
  prog->LinkStatus = false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5cec9f9..552ca55 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -231,7 +231,8 @@ public:
struct brw_shader *shader,
void *mem_ctx,
 bool debug_flag,
-bool no_spills);
+bool no_spills,
+unsigned param_count);
~vec4_visitor();
 
dst_reg dst_null_f()
@@ -325,8 +326,9 @@ public:
 */
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
-   int uniform_size[MAX_UNIFORMS];
-   int uniform_vector_size[MAX_UNIFORMS];
+   unsigned uniform_param_count;
+   int* uniform_size;
+   int* uniform_vector_size;
int uniforms;
 
src_reg shader_start_time;
@@ -546,6 +548,12 @@ private:
 * If true, then register allocation should fail instead of spilling.
 */
const bool no_spills;
+
+   /*
+* Make noncopyable.
+*/
+   vec4_visitor(const vec4_visitor&);
+   vec4_visitor& operator=(const vec4_visitor&);
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index b52d646..b0b77d1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -213,7 +213,7 @@ do_gs_prog(struct brw_context *brw,
void *mem_ctx = ralloc_context(NULL);
unsigned program_size;
const unsigned *program =
-  brw_gs_emit(brw, prog, &c, mem_ctx, &program_size);
+  brw_gs_emit(brw, prog, &c, mem_ctx, &program_size, param_count);
if (program == NULL) {
   ralloc_free(mem_ctx);
   return false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index adbb1cf..16c05f5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -38,10 +38,11 @@ vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw,
  struct gl_shader_program *prog,
  struct brw_shader *shader,
  void *mem_ctx,
- bool no_spills)
+ bool no_spills,
+ unsigned param_count)
: vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base,
   &c->prog_data.base, prog, shader, mem_ctx,
-  INTEL_DEBUG & DEBUG_GS, no_spills),
+  INTEL_DEBUG & DEBUG_GS, no_spills, param_count),
  c(c)
 {
 }
@@ -539,7 +540,8 @@ brw_gs_emit(struct brw_context *brw,
 struct gl_shader_program *prog,
 struct brw_gs_compile *c,
 void *mem_ctx,
-unsigned *final_assembly_size)
+unsigned *final_assembly_size,
+unsigned param_count)
 {
struct brw_shader *shader =
   (brw_shader *) prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
@@ -556,7 +558,7 @@ brw_gs_emit(struct brw_context *brw,
if (likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
   c->prog_data.dual_instanced_dispatch = false;
 
-  vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */);
+  vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */, 

[Mesa-dev] [PATCH 1/2] Increase array sizes to what they should be.

2013-11-08 Thread Petri Latvala
vec4_visitor's uniform_size and uniform_vector_size arrays contain
information about uniforms. Their size should be the number of uniform
components (MAX_UNIFORMS * 4) instead of number of uniform vec4s
(MAX_UNIFORMS).

Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 1f29e57..5b7d4b6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -325,8 +325,8 @@ public:
 */
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
-   int uniform_size[MAX_UNIFORMS];
-   int uniform_vector_size[MAX_UNIFORMS];
+   int uniform_size[MAX_UNIFORMS * 4];
+   int uniform_vector_size[MAX_UNIFORMS * 4];
int uniforms;
 
src_reg shader_start_time;
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] Fix array overrun on too many uniforms

2013-11-08 Thread Petri Latvala
https://bugs.freedesktop.org/show_bug.cgi?id=71254

vec4_visitor has two arrays, uniform_size and
uniform_vector_size. Their size is set to MAX_UNIFORMS, which is the
number of uniform vec4s. The size should be the number of uniform
components instead.

When testing this fix (testcase shader attached in the bug), you'll
need to set the stack limit to bigger than the default 8MB. 16MB was
enough for the testcase shader. I verified that with this fix correct
error message is given with 17000 uniforms, and compiling a shader
with 15000 uniforms works, although it took 5 minutes to compile.

Petri Latvala (2):
  Increase array sizes to what they should be.
  Check access to vec4_visitor's arrays

 src/mesa/drivers/dri/i965/brw_vec4.cpp | 3 +++
 src/mesa/drivers/dri/i965/brw_vec4.h   | 4 ++--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++
 src/mesa/drivers/dri/i965/brw_vec4_vp.cpp  | 1 +
 4 files changed, 12 insertions(+), 2 deletions(-)

-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] Check access to vec4_visitor's arrays

2013-11-08 Thread Petri Latvala
Protect access to vec4_visitor's uniform_size and uniform_vector_size arrays
by asserting the index used.

Signed-off-by: Petri Latvala 
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 3 +++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++
 src/mesa/drivers/dri/i965/brw_vec4_vp.cpp  | 1 +
 3 files changed, 10 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 20fbd45..8912198 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -372,6 +372,7 @@ vec4_visitor::split_uniform_registers()
   }
}
 
+   assert(this->uniforms < MAX_UNIFORMS * 4);
/* Update that everything is now vector-sized. */
for (int i = 0; i < this->uniforms; i++) {
   this->uniform_size[i] = 1;
@@ -409,6 +410,7 @@ vec4_visitor::pack_uniform_registers()
/* Now, figure out a packing of the live uniform vectors into our
 * push constants.
 */
+   assert(uniforms < MAX_UNIFORMS * 4);
for (int src = 0; src < uniforms; src++) {
   int size = this->uniform_vector_size[src];
 
@@ -445,6 +447,7 @@ vec4_visitor::pack_uniform_registers()
}
 
this->uniforms = new_uniform_count;
+   assert(this->uniforms < MAX_UNIFORMS * 4);
 
/* Now, update the instructions for our repacked uniforms. */
foreach_list(node, &this->instructions) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a036e2d..46fd8dc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -663,6 +663,7 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
storage->type->matrix_columns);
 
   for (unsigned s = 0; s < vector_count; s++) {
+ assert(uniforms < MAX_UNIFORMS * 4);
  uniform_vector_size[uniforms] = storage->type->vector_elements;
 
  int i;
@@ -686,6 +687,7 @@ vec4_visitor::setup_uniform_clipplane_values()
gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
+  assert(this->uniforms < MAX_UNIFORMS * 4);
   this->uniform_vector_size[this->uniforms] = 4;
   this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
   this->userplane[i].type = BRW_REGISTER_TYPE_F;
@@ -716,6 +718,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
(gl_state_index *)slots[i].tokens);
   float *values = &this->prog->Parameters->ParameterValues[index][0].f;
 
+  assert(this->uniforms < MAX_UNIFORMS * 4);
   this->uniform_vector_size[this->uniforms] = 0;
   /* Add each of the unique swizzled channels of the element.
* This will end up matching the size of the glsl_type of this field.
@@ -726,6 +729,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
 last_swiz = swiz;
 
 prog_data->param[this->uniforms * 4 + j] = &values[swiz];
+assert(this->uniforms < MAX_UNIFORMS * 4);
 if (swiz <= last_swiz)
this->uniform_vector_size[this->uniforms]++;
   }
@@ -984,6 +988,7 @@ vec4_visitor::visit(ir_variable *ir)
   /* Track how big the whole uniform variable is, in case we need to put a
* copy of its data into pull constants for array access.
*/
+  assert(this->uniforms < MAX_UNIFORMS * 4);
   this->uniform_size[this->uniforms] = type_size(ir->type);
 
   if (!strncmp(ir->name, "gl_", 3)) {
@@ -3198,6 +3203,7 @@ 
vec4_visitor::move_uniform_array_access_to_pull_constants()
 
pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
 
+   assert(uniform < MAX_UNIFORMS * 4);
for (int j = 0; j < uniform_size[uniform] * 4; j++) {
   prog_data->pull_param[prog_data->nr_pull_params++]
   = values[j];
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
index 1f3d75c..bb31e93 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
@@ -443,6 +443,7 @@ vec4_vs_visitor::setup_vp_regs()
*/
   assert(components <= 4);
 
+  assert(this->uniforms < MAX_UNIFORMS * 4);
   this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
   this->uniform_vector_size[this->uniforms] = components;
   for (unsigned i = 0; i < 4; i++) {
-- 
1.8.4.rc3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev