[Mesa-dev] [Bug 82538] Super Maryo Chronicles fails with st/mesa assertion failure

2014-08-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=82538

--- Comment #2 from Michel Dänzer mic...@daenzer.net ---
(In reply to comment #1)
  It works fine for me on Kabini :). Mesa git
 d7d8260f70326cd294715203dae8a8f0150680c1, llvm 3.5-rc2,

I can still reproduce it with current Mesa Git. Does your Mesa build have
assertions enabled?


 smc as Debian package in Sid.

Same here, currently version 1.9+git20121121-1.1.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] clover: fix _logs string creation

2014-08-18 Thread Francisco Jerez
EdB edb+m...@sigluy.net writes:

 compact::string is not \0 terminated.
 size() need to be used for std::string creation
 ---
  src/gallium/state_trackers/clover/core/program.cpp | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

 diff --git a/src/gallium/state_trackers/clover/core/program.cpp 
 b/src/gallium/state_trackers/clover/core/program.cpp
 index e09c3aa..3f504d5 100644
 --- a/src/gallium/state_trackers/clover/core/program.cpp
 +++ b/src/gallium/state_trackers/clover/core/program.cpp
 @@ -61,9 +61,9 @@ program::build(const ref_vectordevice devs, const char 
 *opts) {
  dev.ir_target(), 
 build_opts(dev),
  log));
  _binaries.insert({ dev, module });
 -_logs.insert({ dev, std::string(log.c_str()) });
 +_logs.insert({ dev, std::string(log.c_str(), log.size()) });
   } catch (const build_error ) {
 -_logs.insert({ dev, std::string(log.c_str()) });
 +_logs.insert({ dev, std::string(log.c_str(), log.size()) });

Both of these should just be using the conversion operator.  See
attachment.

  throw;
   }
}
 -- 
 2.0.4

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

From 3c2bec6d790e6aa38fb6d71cd495f281205ddf6c Mon Sep 17 00:00:00 2001
From: Francisco Jerez curroje...@riseup.net
Date: Mon, 18 Aug 2014 09:05:25 +0300
Subject: [PATCH] clover: Use conversion operator to initialize build log from
 compat::string.

Fixes binary garbage in the compilation logs caused by
compat::string::c_str() not being null-terminated (which is a bug on
its own that will be fixed in another commit).

Reported-by: EdB edb+m...@sigluy.net
---
 src/gallium/state_trackers/clover/core/program.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp
index 30a1f0e..6c224db 100644
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -61,9 +61,9 @@ program::build(const ref_vectordevice devs, const char *opts) {
 dev.ir_target(), build_opts(dev),
 log));
 _binaries.insert({ dev, module });
-_logs.insert({ dev, std::string(log.c_str()) });
+_logs.insert({ dev, log });
  } catch (const build_error ) {
-_logs.insert({ dev, std::string(log.c_str()) });
+_logs.insert({ dev, log });
 throw;
  }
   }
-- 
2.0.4



pgpejMi7uG3oD.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] clover: stdify compat::vector a little more

2014-08-18 Thread Francisco Jerez
EdB edb+m...@sigluy.net writes:

 make resize work like std::vector
 reserve take advantage of capacity
 rename members to be uniform with other class
 ---
  src/gallium/state_trackers/clover/core/module.cpp |   2 +-
  src/gallium/state_trackers/clover/util/compat.hpp | 113 
 +++---
  2 files changed, 78 insertions(+), 37 deletions(-)


This could be a *lot* simpler, see attachment.

From abd573bffb674a0a7565b18b38be116472fa5f24 Mon Sep 17 00:00:00 2001
From: Francisco Jerez curroje...@riseup.net
Date: Mon, 18 Aug 2014 08:30:46 +0300
Subject: [PATCH] clover/util: Have compat::vector track separate size and
 capacity.

In order to make the behaviour of resize() and reserve() closer to the
standard.
---
 src/gallium/state_trackers/clover/core/module.cpp |  4 +-
 src/gallium/state_trackers/clover/util/compat.hpp | 67 ++-
 2 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp
index 55ed91a..9ef584b 100644
--- a/src/gallium/state_trackers/clover/core/module.cpp
+++ b/src/gallium/state_trackers/clover/core/module.cpp
@@ -94,7 +94,7 @@ namespace {
 
   static void
   proc(compat::istream is, compat::vectorT v) {
- v.reserve(_procuint32_t(is));
+ v.resize(_procuint32_t(is));
 
  for (size_t i = 0; i  v.size(); i++)
 new(v[i]) T(_procT(is));
@@ -122,7 +122,7 @@ namespace {
 
   static void
   proc(compat::istream is, compat::vectorT v) {
- v.reserve(_procuint32_t(is));
+ v.resize(_procuint32_t(is));
  is.read(reinterpret_castchar *(v.begin()),
  v.size() * sizeof(T));
   }
diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp
index 50e1c7d..a4e3938 100644
--- a/src/gallium/state_trackers/clover/util/compat.hpp
+++ b/src/gallium/state_trackers/clover/util/compat.hpp
@@ -66,65 +66,81 @@ namespace clover {
  typedef std::ptrdiff_t difference_type;
  typedef std::size_t size_type;
 
- vector() : p(NULL), n(0) {
+ vector() : p(NULL), _size(0), _capacity(0) {
  }
 
- vector(const vector v) : p(alloc(v.n, v.p, v.n)), n(v.n) {
+ vector(const vector v) :
+p(alloc(v._size, v.p, v._size)),
+_size(v._size), _capacity(v._size) {
  }
 
- vector(const_iterator p, size_type n) : p(alloc(n, p, n)), n(n) {
+ vector(const_iterator p, size_type n) :
+p(alloc(n, p, n)), _size(n), _capacity(n) {
  }
 
  templatetypename C
  vector(const C v) :
-p(alloc(v.size(), *v.begin(), v.size())), n(v.size()) {
+p(alloc(v.size(), *v.begin(), v.size())),
+_size(v.size()) , _capacity(v.size()) {
  }
 
  ~vector() {
-free(n, p);
+free(_size, p);
  }
 
  vector 
  operator=(const vector v) {
-free(n, p);
+free(_size, p);
 
-p = alloc(v.n, v.p, v.n);
-n = v.n;
+p = alloc(v._size, v.p, v._size);
+_size = v._size;
+_capacity = v._size;
 
 return *this;
  }
 
  void
- reserve(size_type m) {
-if (n  m) {
-   T *q = alloc(m, p, n);
-   free(n, p);
+ reserve(size_type n) {
+if (_capacity  n) {
+   T *q = alloc(n, p, _size);
+   free(_size, p);
 
p = q;
-   n = m;
+   _capacity = n;
 }
  }
 
  void
- resize(size_type m, T x = T()) {
-size_type n = size();
+ resize(size_type n, T x = T()) {
+if (n = _size) {
+   for (size_type i = n; i  _size; ++i)
+  p[i].~T();
 
-reserve(m);
+} else {
+   reserve(n);
 
-for (size_type i = n; i  m; ++i)
-   new(p[i]) T(x);
+   for (size_type i = _size; i  n; ++i)
+  new(p[i]) T(x);
+}
+
+_size = n;
  }
 
  void
  push_back(const T x) {
-size_type n = size();
-reserve(n + 1);
-new(p[n]) T(x);
+reserve(_size + 1);
+new(p[_size]) T(x);
+++_size;
  }
 
  size_type
  size() const {
-return n;
+return _size;
+ }
+
+ size_type
+ capacity() const {
+return _capacity;
  }
 
  iterator
@@ -139,12 +155,12 @@ namespace clover {
 
  iterator
  end() {
-return p + n;
+return p + _size;
  }
 
  const_iterator
  end() const {
-return p + n;
+return p + _size;
  }
 
  

Re: [Mesa-dev] [PATCH] clover: fix piglit cl-api-build-program test

2014-08-18 Thread Francisco Jerez
EdB edb+m...@sigluy.net writes:

 On Sunday, August 17, 2014 11:50:12 PM Francisco Jerez wrote:
 EdB edb+m...@sigluy.net writes:
  Hello
  
  There is a crash with your version.
  This one works
 
 Oops, sorry for that.  It seems like a hack to me to force the kernel
 reference count to one to keep it from being destroyed...  Can you try
 the attached patch instead on top of my clover-next branch [1]?
 8010325eaf and 47e8adea3a are the ones it depends on.
 
 [1] http://cgit.freedesktop.org/~currojerez/mesa/log/?h=clover-next

 It works

 Thanks

Cool, pushed.


pgpD_NQunJHEv.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 82538] Super Maryo Chronicles fails with st/mesa assertion failure

2014-08-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=82538

Michel Dänzer mic...@daenzer.net changed:

   What|Removed |Added

 CC||mar...@gmail.com

--- Comment #3 from Michel Dänzer mic...@daenzer.net ---
Bisected it to:

commit 734e4946f50c1b83dafdb18ced652abc88e6a246
Author: Marek Olšák marek.ol...@amd.com
Date:   Fri Jul 11 00:05:44 2014 +0200

mesa: fix crash in st/mesa after deleting a VAO

This happens when glGetMultisamplefv (or any other non-draw function) is
called, which doesn't invoke the VBO module to update _DrawArrays and
the pointer is invalid at that point.

However st/mesa still dereferences it to setup vertex buffers == crash.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Michel Dänzer
On 16.08.2014 09:12, Connor Abbott wrote:
 I know what you might be thinking right now. Wait, *another* IR? Don't
 we already have like 5 of those, not counting all the driver-specific
 ones? Isn't this stuff complicated enough already? Well, there are some
 pretty good reasons to start afresh (again...). In the years we've been
 using GLSL IR, we've come to realize that, in fact, it's not what we
 want *at all* to do optimizations on.

Did you evaluate using LLVM IR instead of inventing yet another one?


-- 
Earthling Michel Dänzer|  http://www.amd.com
Libre software enthusiast  |Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] clover: fix _logs string creation

2014-08-18 Thread EdB
On Monday, August 18, 2014 09:20:03 AM Francisco Jerez wrote:
 EdB edb+m...@sigluy.net writes:
  compact::string is not \0 terminated.
  size() need to be used for std::string creation
  ---
  
   src/gallium/state_trackers/clover/core/program.cpp | 4 ++--
   1 file changed, 2 insertions(+), 2 deletions(-)
  
  diff --git a/src/gallium/state_trackers/clover/core/program.cpp
  b/src/gallium/state_trackers/clover/core/program.cpp index
  e09c3aa..3f504d5 100644
  --- a/src/gallium/state_trackers/clover/core/program.cpp
  +++ b/src/gallium/state_trackers/clover/core/program.cpp
  @@ -61,9 +61,9 @@ program::build(const ref_vectordevice devs, const
  char *opts) { 
   dev.ir_target(),
   build_opts(dev),
   log));
   
   _binaries.insert({ dev, module });
  
  -_logs.insert({ dev, std::string(log.c_str()) });
  +_logs.insert({ dev, std::string(log.c_str(), log.size()) });
  
} catch (const build_error ) {
  
  -_logs.insert({ dev, std::string(log.c_str()) });
  +_logs.insert({ dev, std::string(log.c_str(), log.size()) });
 
 Both of these should just be using the conversion operator.  See
 attachment.

Agreed, I was highlighting the problem.
Yours is better.

Thanks

 
   throw;

}
 
 }
  
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] clover: stdify compat::vector a little more

2014-08-18 Thread EdB
On Monday, August 18, 2014 09:29:02 AM Francisco Jerez wrote:
 EdB edb+m...@sigluy.net writes:
  make resize work like std::vector
  reserve take advantage of capacity
  rename members to be uniform with other class
  ---
  
   src/gallium/state_trackers/clover/core/module.cpp |   2 +-
   src/gallium/state_trackers/clover/util/compat.hpp | 113
   +++--- 2 files changed, 78 insertions(+), 37
   deletions(-)
 
 This could be a *lot* simpler, see attachment.

Looks good to me.

Thanks
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] mapi: Inline shared-glapi/Makefile.

2014-08-18 Thread Emil Velikov
On 18/08/14 05:14, Matt Turner wrote:
 On Sun, Aug 17, 2014 at 1:06 PM, Kristian Høgsberg hoegsb...@gmail.com 
 wrote:
 On Fri, Aug 15, 2014 at 10:47:06AM -0700, Matt Turner wrote:
 ---
  configure.ac  |  1 -
  src/mapi/Makefile.am  | 44 
 ---
  src/mapi/shared-glapi/Makefile.am | 34 --
  src/mesa/Makefile.sources |  3 ---
  4 files changed, 41 insertions(+), 41 deletions(-)
  delete mode 100644 src/mapi/shared-glapi/Makefile.am

 diff --git a/configure.ac b/configure.ac
 index dc81c80..97d5394 100644
 --- a/configure.ac
 +++ b/configure.ac
 @@ -2243,7 +2243,6 @@ AC_CONFIG_FILES([Makefile
   src/mapi/glapi/Makefile
   src/mapi/glapi/gen/Makefile
   src/mapi/glapi/tests/Makefile
 - src/mapi/shared-glapi/Makefile
   src/mapi/shared-glapi/tests/Makefile
   src/mapi/vgapi/Makefile
   src/mapi/vgapi/vg.pc
 diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am
 index ef53803..6b9444a 100644
 --- a/src/mapi/Makefile.am
 +++ b/src/mapi/Makefile.am
 @@ -1,4 +1,4 @@
 -# Copyright © 2013 Intel Corporation
 +# Copyright © 2013, 2014 Intel Corporation
  #
  # Permission is hereby granted, free of charge, to any person obtaining a
  # copy of this software and associated documentation files (the 
 Software),
 @@ -19,10 +19,46 @@
  # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 DEALINGS
  # IN THE SOFTWARE.

 -SUBDIRS = glapi/gen
 +SUBDIRS = glapi/gen .
 +
 +TOP = $(top_srcdir)
 +
 +BUILT_SOURCES =
 +CLEANFILES = $(BUILT_SOURCES)
 +
 +lib_LTLIBRARIES =
 +
 +AM_CFLAGS = $(PTHREAD_CFLAGS)
 +AM_CPPFLAGS =\
 + $(DEFINES)  \
 + $(SELINUX_CFLAGS)   \
 + -I$(top_srcdir)/include \
 + -I$(top_srcdir)/src/mapi\
 + -I$(top_builddir)/src/mapi
 +
 +GLAPI = $(top_srcdir)/src/mapi/glapi
 +include Makefile.sources
 +include glapi/gen/glapi_gen.mk

  if HAVE_SHARED_GLAPI
 -SUBDIRS += shared-glapi
 +SUBDIRS += shared-glapi/tests
 +
 +BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h
 +
 +lib_LTLIBRARIES += shared-glapi/libglapi.la
 +shared_glapi_libglapi_la_SOURCES = $(MAPI_GLAPI_FILES)
 +shared_glapi_libglapi_la_CPPFLAGS = \
 + $(AM_CPPFLAGS) \
 + -DMAPI_MODE_GLAPI \
 + -DMAPI_ABI_HEADER=\shared-glapi/glapi_mapi_tmp.h\
 +shared_glapi_libglapi_la_LIBADD = $(SELINUX_LIBS)
 +shared_glapi_libglapi_la_LDFLAGS = \
 + -no-undefined \
 + $(GC_SECTIONS) \
 + $(LD_NO_UNDEFINED)
 +
 +shared-glapi/glapi_mapi_tmp.h : $(GLAPI)/gen/gl_and_es_API.xml 
 $(glapi_gen_mapi_deps)
 + $(call glapi_gen_mapi,$,shared-glapi)
  endif

  if HAVE_OPENGL
 @@ -40,3 +76,5 @@ endif
  if HAVE_OPENVG
  SUBDIRS += vgapi
  endif
 +
 +include $(top_srcdir)/install-lib-links.mk
 diff --git a/src/mapi/shared-glapi/Makefile.am 
 b/src/mapi/shared-glapi/Makefile.am
 deleted file mode 100644
 index 330719c..000
 --- a/src/mapi/shared-glapi/Makefile.am
 +++ /dev/null
 @@ -1,34 +0,0 @@
 -# Used by OpenGL ES or when --enable-shared-glapi is specified
 -
 -SUBDIRS = . tests
 -
 -TOP = $(top_srcdir)
 -GLAPI = $(top_srcdir)/src/mapi/glapi
 -include $(top_srcdir)/src/mapi/Makefile.sources
 -
 -lib_LTLIBRARIES = libglapi.la
 -libglapi_la_SOURCES = $(MAPI_GLAPI_FILES)
 -libglapi_la_LIBADD = $(PTHREAD_LIBS) $(SELINUX_LIBS)

 You didn't move $(PTHREAD_LIBS) up to shared_glpai_libglapi_la_LIBADD?
 
 Right... Emil, do you remember whether PTHREAD_LIBS is needed?
 PTHREAD_CFLAGS seems sufficient for me, but I have a vague memory that
 FreeBSD or something needs PTHREAD_LIBS.
 
This seems to be an interesting topic:

ldd states that our current pthreads linking is not needed. On the other hand
the libglapi.so.0.0 has at least one function(pthreads_once) coming from the
pthreads library. At the same time the function is _unused_ by whole of mesa.
Not to mention that *BSD people need the pthreads linking as their libc does
not provide any pthread* symbols.

So in summary, let's keep PTHREAD_LIBS in for now :)

-Emil

 -libglapi_la_LDFLAGS = \
 - -no-undefined \
 - $(GC_SECTIONS) \
 - $(LD_NO_UNDEFINED)
 -
 -include $(GLAPI)/gen/glapi_gen.mk
 -glapi_mapi_tmp.h : $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
 - $(call glapi_gen_mapi,$,shared-glapi)
 -
 -BUILT_SOURCES = glapi_mapi_tmp.h
 -CLEANFILES = $(BUILT_SOURCES)
 -
 -AM_CFLAGS = $(PTHREAD_CFLAGS)
 -AM_CPPFLAGS =\
 - $(DEFINES)  \
 - $(SELINUX_CFLAGS)   \
 - -I$(top_srcdir)/include \
 - -I$(top_srcdir)/src/mapi\
 - -I$(top_builddir)/src/mapi

Re: [Mesa-dev] [PATCH 03/19] glx/drisw: add support for DRI2rendererQueryExtension

2014-08-18 Thread Jon TURNEY

On 14/08/2014 23:18, Emil Velikov wrote:

The extension is used by GLX_MESA_query_renderer, which
can be provided for by hardware and software drivers.

v2: Use designated initializers.
v3: Move drisw_query_renderer_*() to dri2_query_renderer.c


This breaks my build (see [1])

I guess something like the attached is needed.

Possibly dri2_query_renderer.c needs to be renamed, since it's contents 
now are used for more than dri[23].


[1] http://tinderbox.x.org/builds/2014-08-16-0006/logs/mesa-mesa/#build

From ee9b2d044ebb089bc3daf93fc6b71e167c47841f Mon Sep 17 00:00:00 2001
From: Jon TURNEY jon.tur...@dronecode.org.uk
Date: Sun, 17 Aug 2014 17:22:22 +0100
Subject: [PATCH] Fix build since 679c2ef glx/drisw: add support for
 DRI2rendererQueryExtension, when only building drisw renderer.

Signed-off-by: Jon TURNEY jon.tur...@dronecode.org.uk
---
 src/glx/Makefile.am   | 6 +++---
 src/glx/dri2_query_renderer.c | 4 
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am
index cdd898e..23cb794 100644
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -96,7 +96,8 @@ endif
 if HAVE_DRICOMMON
 libglx_la_SOURCES += \
  xfont.c \
- dri_common.c
+ dri_common.c \
+ dri2_query_renderer.c
 endif
 
 if HAVE_DRI2
@@ -104,8 +105,7 @@ libglx_la_SOURCES += \
  dri_glx.c \
  XF86dri.c \
  dri2_glx.c \
- dri2.c \
- dri2_query_renderer.c
+ dri2.c
 endif
 
 if HAVE_DRI3
diff --git a/src/glx/dri2_query_renderer.c b/src/glx/dri2_query_renderer.c
index 247ec1c..6ccd710 100644
--- a/src/glx/dri2_query_renderer.c
+++ b/src/glx/dri2_query_renderer.c
@@ -25,7 +25,9 @@
 
 #include glxclient.h
 #include glx_error.h
+#ifdef HAVE_LIBDRM
 #include dri2.h
+#endif
 #include dri_interface.h
 #include dri2_priv.h
 #if defined(HAVE_DRI3)
@@ -66,6 +68,7 @@ dri2_convert_glx_query_renderer_attribs(int attribute)
return -1;
 }
 
+#ifdef HAVE_LIBDRM
 _X_HIDDEN int
 dri2_query_renderer_integer(struct glx_screen *base, int attribute,
 unsigned int *value)
@@ -103,6 +106,7 @@ dri2_query_renderer_string(struct glx_screen *base, int 
attribute,
 
return psc-rendererQuery-queryString(psc-driScreen, dri_attribute, 
value);
 }
+#endif
 
 #if defined(HAVE_DRI3)
 _X_HIDDEN int
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] build: Let install-lib-links.mk handle .la files in subdirectories.

2014-08-18 Thread Emil Velikov
On 18/08/14 05:19, Matt Turner wrote:
 On Sun, Aug 17, 2014 at 2:39 PM, Emil Velikov emil.l.veli...@gmail.com 
 wrote:
 On 15/08/14 18:47, Matt Turner wrote:
 The next patches are going to combine some of the mapi subdirectories'
 Makefiles into a single Makefile, giving better build parallelism.

 Hi Matt,

 I must admit that while I like this patch, I'm not at all a fan of the rest 
 of
 the series. But I won't object too strongly against the idea.
 
 Oh, really? I mean, there's some complexity just in all of the
 combinations, but I think this is a clean up.
 
 It's certainly an improvement in that we don't have Makefiles that
 build a single source file. After this series if you build GL, ES1,
 and ES2 all of it happens in parallel including the tests.
 
I shall not be going into mapi anytime soon so it's up-to you to have fun in
there. I prefer to get gallium's 'make dist' close to working and clean-up
some of the pipe-loader/targets mess that I've created :P

Not sure if the extra parallelism will help here as I very rarely build ES*
anyway so ;)

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/19] glx/drisw: add support for DRI2rendererQueryExtension

2014-08-18 Thread Emil Velikov
On 18/08/14 12:47, Jon TURNEY wrote:
 On 14/08/2014 23:18, Emil Velikov wrote:
 The extension is used by GLX_MESA_query_renderer, which
 can be provided for by hardware and software drivers.

 v2: Use designated initializers.
 v3: Move drisw_query_renderer_*() to dri2_query_renderer.c
 
 This breaks my build (see [1])
 
Ouch, I've completely forgot about your recent-ish changes in here. Sorry for
the breakage.

 I guess something like the attached is needed.
 
 Possibly dri2_query_renderer.c needs to be renamed, since it's contents now
 are used for more than dri[23].
 
My initial plan was to move the functions to dri_common.c, although that
caused 'make check' to explode so I've kept them here as per Ian's suggestion.
Renaming the file makes sense imho.

 [1] http://tinderbox.x.org/builds/2014-08-16-0006/logs/mesa-mesa/#build
 
 
 0001-Fix-build-since-679c2ef-glx-drisw-add-support-for-DR.patch
 
 
 From ee9b2d044ebb089bc3daf93fc6b71e167c47841f Mon Sep 17 00:00:00 2001
 From: Jon TURNEY jon.tur...@dronecode.org.uk
 Date: Sun, 17 Aug 2014 17:22:22 +0100
 Subject: [PATCH] Fix build since 679c2ef glx/drisw: add support for
  DRI2rendererQueryExtension, when only building drisw renderer.
 
 Signed-off-by: Jon TURNEY jon.tur...@dronecode.org.uk
 ---
  src/glx/Makefile.am   | 6 +++---
  src/glx/dri2_query_renderer.c | 4 
  2 files changed, 7 insertions(+), 3 deletions(-)
 
 diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am
 index cdd898e..23cb794 100644
 --- a/src/glx/Makefile.am
 +++ b/src/glx/Makefile.am
 @@ -96,7 +96,8 @@ endif
  if HAVE_DRICOMMON
  libglx_la_SOURCES += \
 xfont.c \
 -   dri_common.c
 +   dri_common.c \
 +   dri2_query_renderer.c
  endif
  
  if HAVE_DRI2
 @@ -104,8 +105,7 @@ libglx_la_SOURCES += \
 dri_glx.c \
 XF86dri.c \
 dri2_glx.c \
 -   dri2.c \
 -   dri2_query_renderer.c
 +   dri2.c
  endif
  
  if HAVE_DRI3
 diff --git a/src/glx/dri2_query_renderer.c b/src/glx/dri2_query_renderer.c
 index 247ec1c..6ccd710 100644
 --- a/src/glx/dri2_query_renderer.c
 +++ b/src/glx/dri2_query_renderer.c
 @@ -25,7 +25,9 @@
  
  #include glxclient.h
  #include glx_error.h
 +#ifdef HAVE_LIBDRM
  #include dri2.h
 +#endif
With a couple of small changes, I believe that you should be safe with
dropping the above header and the HAVE_LIBDRM guards below.

The small changes:
 - dri*_query_renderer_* into their respective dri*_priv.h
 - Perhaps move a struct from dri2.h to dri2_priv.h

-Emil

  #include dri_interface.h
  #include dri2_priv.h
  #if defined(HAVE_DRI3)
 @@ -66,6 +68,7 @@ dri2_convert_glx_query_renderer_attribs(int attribute)
 return -1;
  }
  
 +#ifdef HAVE_LIBDRM
  _X_HIDDEN int
  dri2_query_renderer_integer(struct glx_screen *base, int attribute,
  unsigned int *value)
 @@ -103,6 +106,7 @@ dri2_query_renderer_string(struct glx_screen *base, int 
 attribute,
  
 return psc-rendererQuery-queryString(psc-driScreen, dri_attribute, 
 value);
  }
 +#endif
  
  #if defined(HAVE_DRI3)
  _X_HIDDEN int
 -- 1.8.5.5
 
 
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Clamp/saturate optimizations v3

2014-08-18 Thread Abdiel Janulgue
v3 of clamp and saturate optimizations

Changes since v1: 
 - Only remove the old try_emit_saturate operations after the new optimizations 
are
   in place. (Matt, Ian)
 - Output [min/max](saturate(x),b) instead of saturate([min/max](x,b)) as 
suggested
   by Ilia Mirkin.
 - The change above required some refactoring in the fs/vec4 backend to allow
   propagation of certain instructions with saturate flag to SEL. For other 
instructions,
   we don't propagate saturate instructions, similar to the previous behaviour.
Since v2:
 - Fix comments to reflect we are doing a commutative operation, add missing 
conditions
   when optimizing clamp in opt_algebraic pass.
 - Refactor try_emit_saturate() in i965/fs instead of completely removing it. 
This fixed a
   a regression where the changes emitted an (extra) unnecessary saturated mov 
when the 
   expression generating src can do saturate directly instead.
 - Fix regression in the i965/vec4 copy-propagate optimization caused by 
ignoring 
   channels in the propagated instruction.
 - Count generated loops from the fs/vec4 generator.

Results from our shader-db:

total instructions in shared programs: 4538627 - 4560104 (0.47%)
instructions in affected programs: 45144 - 66621 (47.57%)
total loops in shared programs:887 - 711 (-19.84%)
GAINED:0
LOST:  36

I modified shader-db a bit to catch loops unrolls. The shaders that show 
increase in
instruction count are all due to the loop unroll pass triggered by this 
optimization
on games that contain looped clamp/saturate operation. The unroll pass also
resulted in a few shaders with looped clamp/sat skipping SIMD16 generation.

** No piglit regressions observed **

Abdiel Janulgue (17):
  i965/vec4/fs: Count loops in shader debug
  glsl: Add ir_unop_saturate
  glsl: Add constant evaluation of ir_unop_saturate
  glsl: Add a pass to lower ir_unop_saturate to clamp(x, 0, 1)
  ir_to_mesa, glsl_to_tgsi: lower ir_unop_saturate
  ir_to_mesa, glsl_to_tgsi: Add support for ir_unop_saturate
  i965/fs: Add support for ir_unop_saturate
  i965/vec4: Add support for ir_unop_saturate
  glsl: Implement saturate as ir_unop_saturate
  glsl: Optimize clamp(x, 0, 1) as saturate(x)
  glsl: Optimize clamp(x, 0.0, b), where b  1.0 as min(saturate(x),b)
  glsl: Optimize clamp(x, b, 1.0), where b  0.0 as max(saturate(x),b)
  i965/fs: Allow propagation of instructions with saturate flag to sel
  i965/vec4: Allow propagation of instructions with saturate flag to sel
  ir_to_mesa, glsl_to_tgsi: Remove try_emit_saturate
  i965/fs: Refactor try_emit_saturate
  i965/vec4: Remove try_emit_saturate

 src/glsl/ir.cpp  |  2 +
 src/glsl/ir.h|  1 +
 src/glsl/ir_builder.cpp  |  6 +-
 src/glsl/ir_constant_expression.cpp  |  6 ++
 src/glsl/ir_optimization.h   |  1 +
 src/glsl/ir_validate.cpp |  1 +
 src/glsl/lower_instructions.cpp  | 29 
 src/glsl/opt_algebraic.cpp   | 98 
++
 src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp |  1 +
 src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp| 18 -
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |  6 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 27 ---
 src/mesa/drivers/dri/i965/brw_vec4.h |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp  | 85 
+++---
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  6 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 25 ++-
 src/mesa/program/ir_to_mesa.cpp  | 59 +++-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 63 +++--
 18 files changed, 261 insertions(+), 175 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/17] glsl: Add ir_unop_saturate

2014-08-18 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/glsl/ir.cpp  | 2 ++
 src/glsl/ir.h| 1 +
 src/glsl/ir_validate.cpp | 1 +
 3 files changed, 4 insertions(+)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 4a4d304..ef04ed0 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -255,6 +255,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_dFdy_fine:
case ir_unop_bitfield_reverse:
case ir_unop_interpolate_at_centroid:
+   case ir_unop_saturate:
   this-type = op0-type;
   break;
 
@@ -534,6 +535,7 @@ static const char *const operator_strs[] = {
bit_count,
find_msb,
find_lsb,
+   sat,
noise,
interpolate_at_centroid,
+,
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 18623b9..96c8b0e 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1248,6 +1248,7 @@ enum ir_expression_operation {
ir_unop_find_lsb,
/*@}*/
 
+   ir_unop_saturate,
ir_unop_noise,
 
/**
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 5b20677..97a581d 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -241,6 +241,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_log:
case ir_unop_exp2:
case ir_unop_log2:
+   case ir_unop_saturate:
   assert(ir-operands[0]-type-base_type == GLSL_TYPE_FLOAT);
   assert(ir-type == ir-operands[0]-type);
   break;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/17] ir_to_mesa, glsl_to_tgsi: lower ir_unop_saturate

2014-08-18 Thread Abdiel Janulgue
Needed when vertex programs doesn't allow saturate

Reviewed-by: Matt Turner matts...@gmail.com
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/mesa/program/ir_to_mesa.cpp| 5 -
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 011ffed..e8126b3 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2991,9 +2991,12 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
 
 /* Lowering */
 do_mat_op_to_vec(ir);
+GLenum target = 
_mesa_shader_stage_to_program(prog-_LinkedShaders[i]-Stage);
 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
-| ((options-EmitNoPow) ? POW_TO_EXP2 : 0)));
+| ((options-EmitNoPow) ? POW_TO_EXP2 : 0)
+| ((target == GL_VERTEX_PROGRAM_ARB) ? 
SAT_TO_CLAMP
+: 0)));
 
 progress = do_lower_jumps(ir, true, true, options-EmitNoMainReturn, 
options-EmitNoCont, options-EmitNoLoops) || progress;
 
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 84bdc4f..575da1e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5429,6 +5429,9 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   if (!pscreen-get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
  lower_offset_arrays(ir);
   do_mat_op_to_vec(ir);
+  /* Emit saturates in the vertex shader only if SM 3.0 is supported. */
+  bool vs_sm3 = 
(_mesa_shader_stage_to_program(prog-_LinkedShaders[i]-Stage) ==
+ GL_VERTEX_PROGRAM_ARB)  
st_context(ctx)-has_shader_model3;
   lower_instructions(ir,
  MOD_TO_FRACT |
  DIV_TO_MUL_RCP |
@@ -5438,7 +5441,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
  CARRY_TO_ARITH |
  BORROW_TO_ARITH |
  (options-EmitNoPow ? POW_TO_EXP2 : 0) |
- (!ctx-Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 
0));
+ (!ctx-Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) 
|
+ (vs_sm3 ? SAT_TO_CLAMP : 0));
 
   lower_ubo_reference(prog-_LinkedShaders[i], ir);
   do_vec_index_to_cond_assign(ir);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/17] i965/fs: Add support for ir_unop_saturate

2014-08-18 Thread Abdiel Janulgue
Reviewed-by: Matt Turner matts...@gmail.com
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 
 2 files changed, 5 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index d98b7eb..cb0a079 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -246,6 +246,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment 
*ir)
case ir_unop_bit_count:
case ir_unop_find_msb:
case ir_unop_find_lsb:
+   case ir_unop_saturate:
   for (i = 0; i  vector_elements; i++) {
 ir_rvalue *op0 = get_element(op_var[0], i);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 05082ee..c33c46b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -854,6 +854,10 @@ fs_visitor::visit(ir_expression *ir)
case ir_unop_find_lsb:
   emit(FBL(this-result, op[0]));
   break;
+   case ir_unop_saturate:
+  inst = emit(MOV(this-result, op[0]));
+  inst-saturate = true;
+  break;
case ir_triop_bitfield_extract:
   /* Note that the instruction's argument order is reversed from GLSL
* and the IR.
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/17] glsl: Add a pass to lower ir_unop_saturate to clamp(x, 0, 1)

2014-08-18 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/glsl/ir_optimization.h  |  1 +
 src/glsl/lower_instructions.cpp | 29 +
 2 files changed, 30 insertions(+)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index b83c225..1c6f72b 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -40,6 +40,7 @@
 #define LDEXP_TO_ARITH 0x100
 #define CARRY_TO_ARITH 0x200
 #define BORROW_TO_ARITH0x400
+#define SAT_TO_CLAMP   0x800
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 176070c..6842853 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -41,6 +41,7 @@
  * - BITFIELD_INSERT_TO_BFM_BFI
  * - CARRY_TO_ARITH
  * - BORROW_TO_ARITH
+ * - SAT_TO_CLAMP
  *
  * SUB_TO_ADD_NEG:
  * ---
@@ -104,6 +105,10 @@
  * 
  * Converts ir_borrow into (x  y).
  *
+ * SAT_TO_CLAMP:
+ * -
+ * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
+ *
  */
 
 #include main/core.h /* for M_LOG2E */
@@ -139,6 +144,7 @@ private:
void ldexp_to_arith(ir_expression *);
void carry_to_arith(ir_expression *);
void borrow_to_arith(ir_expression *);
+   void sat_to_clamp(ir_expression *);
 };
 
 } /* anonymous namespace */
@@ -484,6 +490,24 @@ lower_instructions_visitor::borrow_to_arith(ir_expression 
*ir)
this-progress = true;
 }
 
+void
+lower_instructions_visitor::sat_to_clamp(ir_expression *ir)
+{
+   /* Translates
+*   ir_unop_saturate x
+* into
+*   ir_binop_min (ir_binop_max(x, 0.0), 1.0)
+*/
+
+   ir-operation = ir_binop_min;
+   ir-operands[0] = new(ir) ir_expression(ir_binop_max, ir-operands[0]-type,
+   ir-operands[0],
+   new(ir) ir_constant(0.0f));
+   ir-operands[1] = new(ir) ir_constant(1.0f);
+
+   this-progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -540,6 +564,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
  borrow_to_arith(ir);
   break;
 
+   case ir_unop_saturate:
+  if (lowering(SAT_TO_CLAMP))
+ sat_to_clamp(ir);
+  break;
+
default:
   return visit_continue;
}
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/17] i965/vec4: Allow propagation of instructions with saturate flag to sel

2014-08-18 Thread Abdiel Janulgue
When sel conditon is bounded within 0 and 1.0. This allows code as:
mov.sat a b
sel.ge  dst a 0.25F

To be propagated as:
sel.ge.sat dst b 0.25F

v3: - Syntax clarifications in inst-saturate assignment
- Remove extra parenthesis when assigning src_reg value
  from copy_entry (Matt Turner)
v4: - Take channels into consideration when propagating saturated instructions.

Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 85 +++---
 1 file changed, 58 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 37ca661..fe47b0f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -36,13 +36,17 @@ extern C {
 
 namespace brw {
 
+struct copy_entry {
+   src_reg *value[4];
+   int saturatemask;
+};
+
 static bool
 is_direct_copy(vec4_instruction *inst)
 {
return (inst-opcode == BRW_OPCODE_MOV 
   !inst-predicate 
   inst-dst.file == GRF 
-  !inst-saturate 
   !inst-dst.reladdr 
   !inst-src[0].reladdr 
   inst-dst.type == inst-src[0].type);
@@ -74,16 +78,16 @@ is_channel_updated(vec4_instruction *inst, src_reg 
*values[4], int ch)
 
 static bool
 try_constant_propagate(struct brw_context *brw, vec4_instruction *inst,
-   int arg, src_reg *values[4])
+   int arg, struct copy_entry *entry)
 {
/* For constant propagation, we only handle the same constant
 * across all 4 channels.  Some day, we should handle the 8-bit
 * float vector format, which would let us constant propagate
 * vectors better.
 */
-   src_reg value = *values[0];
+   src_reg value = *entry-value[0];
for (int i = 1; i  4; i++) {
-  if (!value.equals(*values[i]))
+  if (!value.equals(*entry-value[i]))
 return false;
}
 
@@ -213,22 +217,22 @@ is_logic_op(enum opcode opcode)
 
 static bool
 try_copy_propagate(struct brw_context *brw, vec4_instruction *inst,
-   int arg, src_reg *values[4])
+   int arg, struct copy_entry *entry, int reg)
 {
/* For constant propagation, we only handle the same constant
 * across all 4 channels.  Some day, we should handle the 8-bit
 * float vector format, which would let us constant propagate
 * vectors better.
 */
-   src_reg value = *values[0];
+   src_reg value = *entry-value[0];
for (int i = 1; i  4; i++) {
   /* This is equals() except we don't care about the swizzle. */
-  if (value.file != values[i]-file ||
- value.reg != values[i]-reg ||
- value.reg_offset != values[i]-reg_offset ||
- value.type != values[i]-type ||
- value.negate != values[i]-negate ||
- value.abs != values[i]-abs) {
+  if (value.file != entry-value[i]-file ||
+ value.reg != entry-value[i]-reg ||
+ value.reg_offset != entry-value[i]-reg_offset ||
+ value.type != entry-value[i]-type ||
+ value.negate != entry-value[i]-negate ||
+ value.abs != entry-value[i]-abs) {
 return false;
   }
}
@@ -239,7 +243,7 @@ try_copy_propagate(struct brw_context *brw, 
vec4_instruction *inst,
 */
int s[4];
for (int i = 0; i  4; i++) {
-  s[i] = BRW_GET_SWZ(values[i]-swizzle,
+  s[i] = BRW_GET_SWZ(entry-value[i]-swizzle,
 BRW_GET_SWZ(inst-src[arg].swizzle, i));
}
value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
@@ -300,6 +304,25 @@ try_copy_propagate(struct brw_context *brw, 
vec4_instruction *inst,
if (value.equals(inst-src[arg]))
   return false;
 
+   /* Limit saturate propagation only to SEL with src1 bounded within 1.0 and 
1.0
+* otherwise, skip copy propagate altogether
+*/
+   if (entry-saturatemask  (1  arg)) {
+  switch(inst-opcode) {
+  case BRW_OPCODE_SEL:
+ if (inst-src[1].file != IMM ||
+ inst-src[1].fixed_hw_reg.dw1.f  0.0 ||
+ inst-src[1].fixed_hw_reg.dw1.f  1.0) {
+return false;
+ }
+ if (!inst-saturate)
+inst-saturate = true;
+ break;
+  default:
+ return false;
+  }
+   }
+
value.type = inst-src[arg].type;
inst-src[arg] = value;
return true;
@@ -309,9 +332,9 @@ bool
 vec4_visitor::opt_copy_propagation()
 {
bool progress = false;
-   src_reg *cur_value[virtual_grf_reg_count][4];
+   struct copy_entry entries[virtual_grf_reg_count];
 
-   memset(cur_value, 0, sizeof(cur_value));
+   memset(entries, 0, sizeof(entries));
 
foreach_in_list(vec4_instruction, inst, instructions) {
   /* This pass only works on basic blocks.  If there's flow
@@ -322,7 +345,7 @@ vec4_visitor::opt_copy_propagation()
* src/glsl/opt_copy_propagation.cpp to track available 

[Mesa-dev] [PATCH 17/17] i965/vec4: Remove try_emit_saturate

2014-08-18 Thread Abdiel Janulgue
Now that saturate is implemented natively as an instruction,
we can cut down on unneeded functionality.

Reviewed-by: Matt Turner matts...@gmail.com
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |  1 -
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 21 -
 2 files changed, 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index c333baa..e5ad7af 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -563,7 +563,6 @@ public:
src_reg orig_src,
int base_offset);
 
-   bool try_emit_sat(ir_expression *ir);
bool try_emit_mad(ir_expression *ir);
bool try_emit_b2f_of_compare(ir_expression *ir);
void resolve_ud_negate(src_reg *reg);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 2e7a85d..95d46c2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1078,24 +1078,6 @@ vec4_visitor::visit(ir_function *ir)
 }
 
 bool
-vec4_visitor::try_emit_sat(ir_expression *ir)
-{
-   ir_rvalue *sat_src = ir-as_rvalue_to_saturate();
-   if (!sat_src)
-  return false;
-
-   sat_src-accept(this);
-   src_reg src = this-result;
-
-   this-result = src_reg(this, ir-type);
-   vec4_instruction *inst;
-   inst = emit(MOV(dst_reg(this-result), src));
-   inst-saturate = true;
-
-   return true;
-}
-
-bool
 vec4_visitor::try_emit_mad(ir_expression *ir)
 {
/* 3-src instructions were introduced in gen6. */
@@ -1228,9 +1210,6 @@ vec4_visitor::visit(ir_expression *ir)
dst_reg result_dst;
vec4_instruction *inst;
 
-   if (try_emit_sat(ir))
-  return;
-
if (ir-operation == ir_binop_add) {
   if (try_emit_mad(ir))
 return;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/17] i965/fs: Allow propagation of instructions with saturate flag to sel

2014-08-18 Thread Abdiel Janulgue
When sel conditon is bounded within 0 and 1.0. This allows code as:
mov.sat a b
sel.ge  dst a 0.25F

To be propagated as:
sel.ge.sat dst b 0.25F

v3: Syntax clarifications in inst-saturate assignment (Matt Turner)

Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 09f51bc..7e4eab7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -43,6 +43,7 @@ struct acp_entry : public exec_node {
fs_reg dst;
fs_reg src;
enum opcode opcode;
+   bool saturate;
 };
 
 struct block_data {
@@ -347,11 +348,26 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, 
acp_entry *entry)
   return false;
}
 
+   if (entry-saturate) {
+  switch(inst-opcode) {
+  case BRW_OPCODE_SEL:
+ if (inst-src[1].file != IMM ||
+ inst-src[1].fixed_hw_reg.dw1.f  0.0 ||
+ inst-src[1].fixed_hw_reg.dw1.f  1.0) {
+return false;
+ }
+ break;
+  default:
+ return false;
+  }
+   }
+
inst-src[arg].file = entry-src.file;
inst-src[arg].reg = entry-src.reg;
inst-src[arg].reg_offset = entry-src.reg_offset;
inst-src[arg].subreg_offset = entry-src.subreg_offset;
inst-src[arg].stride *= entry-src.stride;
+   inst-saturate = inst-saturate || entry-saturate;
 
if (!inst-src[arg].abs) {
   inst-src[arg].abs = entry-src.abs;
@@ -514,7 +530,6 @@ can_propagate_from(fs_inst *inst)
 inst-src[0].file == UNIFORM ||
 inst-src[0].file == IMM) 
inst-src[0].type == inst-dst.type 
-   !inst-saturate 
!inst-is_partial_write());
 }
 
@@ -569,6 +584,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, 
bblock_t *block,
 entry-dst = inst-dst;
 entry-src = inst-src[0];
  entry-opcode = inst-opcode;
+ entry-saturate = inst-saturate;
 acp[entry-dst.reg % ACP_HASH_SIZE].push_tail(entry);
   } else if (inst-opcode == SHADER_OPCODE_LOAD_PAYLOAD 
  inst-dst.file == GRF) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/17] i965/vec4/fs: Count loops in shader debug

2014-08-18 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 6 --
 src/mesa/drivers/dri/i965/brw_vec4.h | 1 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 6 --
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index a243003..c4e6c6d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1483,6 +1483,7 @@ void
 fs_generator::generate_code(exec_list *instructions)
 {
int start_offset = p-next_insn_offset;
+   int loop_count = 0;
 
struct annotation_info annotation;
memset(annotation, 0, sizeof(annotation));
@@ -1743,6 +1744,7 @@ fs_generator::generate_code(exec_list *instructions)
 
   case BRW_OPCODE_WHILE:
 brw_WHILE(p);
+ loop_count++;
 break;
 
   case SHADER_OPCODE_RCP:
@@ -1970,9 +1972,9 @@ fs_generator::generate_code(exec_list *instructions)
  fprintf(stderr, Native code for blorp program (SIMD%d dispatch):\n,
  dispatch_width);
   }
-  fprintf(stderr, SIMD%d shader: %d instructions. Compacted %d to %d
+  fprintf(stderr, SIMD%d shader: %d instructions. %d loops. Compacted %d 
to %d
bytes (%.0f%%)\n,
-  dispatch_width, before_size / 16, before_size, after_size,
+  dispatch_width, before_size / 16, loop_count, before_size, 
after_size,
   100.0f * (before_size - after_size) / before_size);
 
   const struct gl_program *prog = fp ? fp-Base : NULL;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index c59d24f..c333baa 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -699,6 +699,7 @@ private:
 
void *mem_ctx;
const bool debug_flag;
+   int loop_count;
 };
 
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 1b1e647..b8948c3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1188,6 +1188,7 @@ 
vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
 
case BRW_OPCODE_WHILE:
   brw_WHILE(p);
+  loop_count++;
   break;
 
case SHADER_OPCODE_RCP:
@@ -1318,6 +1319,7 @@ vec4_generator::generate_code(exec_list *instructions)
 {
struct annotation_info annotation;
memset(annotation, 0, sizeof(annotation));
+   loop_count = 0;
 
cfg_t *cfg = NULL;
if (unlikely(debug_flag))
@@ -1372,9 +1374,9 @@ vec4_generator::generate_code(exec_list *instructions)
   } else {
  fprintf(stderr, Native code for vertex program %d:\n, prog-Id);
   }
-  fprintf(stderr, vec4 shader: %d instructions. Compacted %d to %d
+  fprintf(stderr, vec4 shader: %d instructions. %d loops. Compacted %d to 
%d
bytes (%.0f%%)\n,
-  before_size / 16, before_size, after_size,
+  before_size / 16, loop_count, before_size, after_size,
   100.0f * (before_size - after_size) / before_size);
 
   dump_assembly(p-store, annotation.ann_count, annotation.ann, brw, prog);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/17] glsl: Implement saturate as ir_unop_saturate

2014-08-18 Thread Abdiel Janulgue
Now that we have the ir_unop_saturate implemented as a single
instruction, generate the correct simplified expression.

Reviewed-by: Matt Turner matts...@gmail.com
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/glsl/ir_builder.cpp | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/glsl/ir_builder.cpp b/src/glsl/ir_builder.cpp
index f039414..a2f6f29 100644
--- a/src/glsl/ir_builder.cpp
+++ b/src/glsl/ir_builder.cpp
@@ -271,11 +271,7 @@ clamp(operand a, operand b, operand c)
 ir_expression *
 saturate(operand a)
 {
-   void *mem_ctx = ralloc_parent(a.val);
-
-   return expr(ir_binop_max,
-  expr(ir_binop_min, a, new(mem_ctx) ir_constant(1.0f)),
-  new(mem_ctx) ir_constant(0.0f));
+   return expr(ir_unop_saturate, a);
 }
 
 ir_expression *
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/17] glsl: Optimize clamp(x, 0.0, b), where b 1.0 as min(saturate(x), b)

2014-08-18 Thread Abdiel Janulgue
v2: - Output min(saturate(x),b) instead of saturate(min(x,b)) suggested by Ilia 
Mirkin
- Make sure we do component-wise comparison for vectors (Ian Romanick)
v3: - Add missing condition where the outer constant value is zero and
  inner constant is  1
- Fix comments to reflect we are doing a commutative operation (Matt Turner)

Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/glsl/opt_algebraic.cpp | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index 4b052933..6dfb681 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -110,6 +110,33 @@ is_vec_basis(ir_constant *ir)
return (ir == NULL) ? false : ir-is_basis();
 }
 
+static inline bool
+is_valid_vec_const(ir_constant *ir)
+{
+   if (ir == NULL)
+  return false;
+
+   if (!ir-type-is_scalar()  !ir-type-is_vector())
+  return false;
+
+   return true;
+}
+
+static inline bool
+is_less_than_one(ir_constant *ir)
+{
+   if (!is_valid_vec_const(ir))
+  return false;
+
+   unsigned component = 0;
+   for (int c = 0; c  ir-type-vector_elements; c++) {
+  if (ir-get_float_component(c)  1.0f)
+ component++;
+   }
+
+   return (component == ir-type-vector_elements);
+}
+
 static void
 update_type(ir_expression *ir)
 {
@@ -645,6 +672,18 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 if ((outer_const-is_one()  inner_val_a-is_zero()) ||
 (inner_val_a-is_one()  outer_const-is_zero()))
return saturate(inner_val_b);
+
+/* Found a {min|max} ({max|min} (x, 0.0), b) where b  1.0
+ * and its variations
+ */
+if (is_less_than_one(outer_const)  inner_val_b-is_zero())
+   return expr(ir_binop_min, saturate(inner_val_a), outer_const);
+
+if (!inner_val_b-as_constant())
+   continue;
+
+if (is_less_than_one(inner_val_b-as_constant())  
outer_const-is_zero())
+   return expr(ir_binop_min, saturate(inner_val_a), inner_val_b);
  }
   }
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/17] ir_to_mesa, glsl_to_tgsi: Add support for ir_unop_saturate

2014-08-18 Thread Abdiel Janulgue
Reviewed-by: Matt Turner matts...@gmail.com
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/mesa/program/ir_to_mesa.cpp| 6 ++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 ++
 2 files changed, 12 insertions(+)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index e8126b3..f212aed 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1171,6 +1171,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
   emit(ir, OPCODE_DDY, result_dst, op[0]);
   break;
 
+   case ir_unop_saturate: {
+  ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV,
+  result_dst, op[0]);
+  inst-saturate = true;
+  break;
+   }
case ir_unop_noise: {
   const enum prog_opcode opcode =
 prog_opcode(OPCODE_NOISE1
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 575da1e..55b9940 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1460,6 +1460,12 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
case ir_unop_cos_reduced:
   emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
   break;
+   case ir_unop_saturate: {
+  glsl_to_tgsi_instruction *inst;
+  inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+  inst-saturate = true;
+  break;
+   }
 
case ir_unop_dFdx:
case ir_unop_dFdx_coarse:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/17] glsl: Optimize clamp(x, 0, 1) as saturate(x)

2014-08-18 Thread Abdiel Janulgue
v2: - Check that the base type is float (Ian Romanick)
v3: - Make sure comments reflect that we are doing a commutative operation
- Add missing condition where the inner constant is 1.0 and outer constant 
is 0.0
- Make indexing of operands easier to read (Matt Turner)

Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/glsl/opt_algebraic.cpp | 36 
 1 file changed, 36 insertions(+)

diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index ac7514a..4b052933 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -614,6 +614,42 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 
   break;
 
+   case ir_binop_min:
+   case ir_binop_max:
+  if (ir-type-base_type != GLSL_TYPE_FLOAT)
+ break;
+
+  /* Replace min(max) operations and its commutative combinations with
+   * a saturate operation
+   */
+  for (int op = 0; op  2; op++) {
+ ir_expression *minmax = op_expr[op];
+ ir_constant *outer_const = op_const[1 - op];
+ ir_expression_operation op_cond = (ir-operation == ir_binop_max) ?
+ir_binop_min : ir_binop_max;
+
+ if (!minmax || !outer_const || (minmax-operation != op_cond))
+continue;
+
+ /* Found a min(max) combination. Now try to see if its operands
+  * meet our conditions that we can do just a single saturate operation
+  */
+ for (int minmax_op = 0; minmax_op  2; minmax_op++) {
+ir_rvalue *inner_val_a = minmax-operands[minmax_op];
+ir_rvalue *inner_val_b = minmax-operands[1 - minmax_op];
+
+if (!inner_val_a || !inner_val_b)
+   continue;
+
+/* Found a {min|max} ({max|min} (x, 0.0), 1.0) operation and its 
variations */
+if ((outer_const-is_one()  inner_val_a-is_zero()) ||
+(inner_val_a-is_one()  outer_const-is_zero()))
+   return saturate(inner_val_b);
+ }
+  }
+
+  break;
+
case ir_unop_rcp:
   if (op_expr[0]  op_expr[0]-operation == ir_unop_rcp)
 return op_expr[0]-operands[0];
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/17] glsl: Add constant evaluation of ir_unop_saturate

2014-08-18 Thread Abdiel Janulgue
v2: Use CLAMP macro (Ian Romanick)

Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/glsl/ir_constant_expression.cpp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/glsl/ir_constant_expression.cpp 
b/src/glsl/ir_constant_expression.cpp
index 9606021..1e8b3a3 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -1469,6 +1469,12 @@ ir_expression::constant_expression_value(struct 
hash_table *variable_context)
   }
   break;
 
+   case ir_unop_saturate:
+  for (unsigned c = 0; c  components; c++) {
+ data.f[c] = CLAMP(op[0]-value.f[c], 0.0f, 1.0f);
+  }
+  break;
+
case ir_triop_bitfield_extract: {
   int offset = op[1]-value.i[0];
   int bits = op[2]-value.i[0];
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/17] glsl: Optimize clamp(x, b, 1.0), where b 0.0 as max(saturate(x), b)

2014-08-18 Thread Abdiel Janulgue
v2: - Output max(saturate(x),b) instead of saturate(max(x,b))
- Make sure we do component-wise comparison for vectors (Ian Romanick)
v3: - Add missing condition where the outer constant value is  0.0 and
  inner constant is 1.0.
- Fix comments to show that the optimization is a commutative operation
  (Matt Turner)

Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/glsl/opt_algebraic.cpp | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp
index 6dfb681..447618f 100644
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -137,6 +137,21 @@ is_less_than_one(ir_constant *ir)
return (component == ir-type-vector_elements);
 }
 
+static inline bool
+is_greater_than_zero(ir_constant *ir)
+{
+   if (!is_valid_vec_const(ir))
+  return false;
+
+   unsigned component = 0;
+   for (int c = 0; c  ir-type-vector_elements; c++) {
+  if (ir-get_float_component(c)  0.0f)
+ component++;
+   }
+
+   return (component == ir-type-vector_elements);
+}
+
 static void
 update_type(ir_expression *ir)
 {
@@ -684,6 +699,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 
 if (is_less_than_one(inner_val_b-as_constant())  
outer_const-is_zero())
return expr(ir_binop_min, saturate(inner_val_a), inner_val_b);
+
+/* Found a {min|max} ({max|min} (x, b), 1.0), where b  0.0
+ * and its variations
+ */
+if (outer_const-is_one()  
is_greater_than_zero(inner_val_b-as_constant()))
+   return expr(ir_binop_max, saturate(inner_val_a), inner_val_b);
+if (inner_val_b-as_constant()-is_one()  
is_greater_than_zero(outer_const))
+   return expr(ir_binop_max, saturate(inner_val_a), outer_const);
  }
   }
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/17] i965/fs: Refactor try_emit_saturate

2014-08-18 Thread Abdiel Janulgue
v3: Since the fs backend can emit saturate as a separate instruction, there is
no need to detect for min/max instructions and to rewrite the instruction 
tree
accordingly. On the other hand, we don't need to emit a separate saturated
mov either when the expression generating src can do saturate directly.

Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index c33c46b..aeb076a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -267,17 +267,14 @@ fs_visitor::emit_minmax(enum brw_conditional_mod 
conditionalmod, const fs_reg d
}
 }
 
-/* Instruction selection: Produce a MOV.sat instead of
- * MIN(MAX(val, 0), 1) when possible.
- */
 bool
 fs_visitor::try_emit_saturate(ir_expression *ir)
 {
-   ir_rvalue *sat_val = ir-as_rvalue_to_saturate();
-
-   if (!sat_val)
+   if (ir-operation != ir_unop_saturate)
   return false;
 
+   ir_rvalue *sat_val = ir-operands[0];
+
fs_inst *pre_inst = (fs_inst *) this-instructions.get_tail();
 
sat_val-accept(this);
@@ -285,21 +282,17 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
 
fs_inst *last_inst = (fs_inst *) this-instructions.get_tail();
 
-   /* If the last instruction from our accept() didn't generate our
-* src, generate a saturated MOV
+   /* If the last instruction from our accept() generated our
+* src, just set the saturate flag instead of emmitting a separate mov.
 */
fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
-   if (!modify || modify-regs_written != 1) {
-  this-result = fs_reg(this, ir-type);
-  fs_inst *inst = emit(MOV(this-result, src));
-  inst-saturate = true;
-   } else {
+   if (modify  modify-regs_written == 1) {
   modify-saturate = true;
   this-result = src;
+  return true;
}
 
-
-   return true;
+   return false;
 }
 
 bool
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/17] i965/vec4: Add support for ir_unop_saturate

2014-08-18 Thread Abdiel Janulgue
Reviewed-by: Matt Turner matts...@gmail.com
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f22d38d..2e7a85d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1389,6 +1389,10 @@ vec4_visitor::visit(ir_expression *ir)
case ir_unop_find_lsb:
   emit(FBL(result_dst, op[0]));
   break;
+   case ir_unop_saturate:
+  inst = emit(MOV(result_dst, op[0]));
+  inst-saturate = true;
+  break;
 
case ir_unop_noise:
   unreachable(not reached: should be handled by lower_noise);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/17] ir_to_mesa, glsl_to_tgsi: Remove try_emit_saturate

2014-08-18 Thread Abdiel Janulgue
Now that saturate is implemented natively as instruction,
we can cut down on unneeded functionality.

Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
---
 src/mesa/program/ir_to_mesa.cpp| 48 
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 51 --
 2 files changed, 99 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index f212aed..325946f 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -311,7 +311,6 @@ public:
  int mul_operand);
bool try_emit_mad_for_and_not(ir_expression *ir,
 int mul_operand);
-   bool try_emit_sat(ir_expression *ir);
 
void emit_swz(ir_expression *ir);
 
@@ -866,50 +865,6 @@ ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression 
*ir, int try_operand)
return true;
 }
 
-bool
-ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
-{
-   /* Saturates were only introduced to vertex programs in
-* NV_vertex_program3, so don't give them to drivers in the VP.
-*/
-   if (this-prog-Target == GL_VERTEX_PROGRAM_ARB)
-  return false;
-
-   ir_rvalue *sat_src = ir-as_rvalue_to_saturate();
-   if (!sat_src)
-  return false;
-
-   sat_src-accept(this);
-   src_reg src = this-result;
-
-   /* If we generated an expression instruction into a temporary in
-* processing the saturate's operand, apply the saturate to that
-* instruction.  Otherwise, generate a MOV to do the saturate.
-*
-* Note that we have to be careful to only do this optimization if
-* the instruction in question was what generated src-result.  For
-* example, ir_dereference_array might generate a MUL instruction
-* to create the reladdr, and return us a src reg using that
-* reladdr.  That MUL result is not the value we're trying to
-* saturate.
-*/
-   ir_expression *sat_src_expr = sat_src-as_expression();
-   ir_to_mesa_instruction *new_inst;
-   new_inst = (ir_to_mesa_instruction *)this-instructions.get_tail();
-   if (sat_src_expr  (sat_src_expr-operation == ir_binop_mul ||
-   sat_src_expr-operation == ir_binop_add ||
-   sat_src_expr-operation == ir_binop_dot)) {
-  new_inst-saturate = true;
-   } else {
-  this-result = get_temp(ir-type);
-  ir_to_mesa_instruction *inst;
-  inst = emit(ir, OPCODE_MOV, dst_reg(this-result), src);
-  inst-saturate = true;
-   }
-
-   return true;
-}
-
 void
 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
src_reg *reg, int *num_reladdr)
@@ -1072,9 +1027,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 return;
}
 
-   if (try_emit_sat(ir))
-  return;
-
if (ir-operation == ir_quadop_vector) {
   this-emit_swz(ir);
   return;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 55b9940..2946286 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -446,7 +446,6 @@ public:
   int mul_operand);
bool try_emit_mad_for_and_not(ir_expression *ir,
   int mul_operand);
-   bool try_emit_sat(ir_expression *ir);
 
void emit_swz(ir_expression *ir);
 
@@ -1270,53 +1269,6 @@ 
glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
return true;
 }
 
-bool
-glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
-{
-   /* Emit saturates in the vertex shader only if SM 3.0 is supported.
-*/
-   if (this-prog-Target == GL_VERTEX_PROGRAM_ARB 
-   !st_context(this-ctx)-has_shader_model3) {
-  return false;
-   }
-
-   ir_rvalue *sat_src = ir-as_rvalue_to_saturate();
-   if (!sat_src)
-  return false;
-
-   sat_src-accept(this);
-   st_src_reg src = this-result;
-
-   /* If we generated an expression instruction into a temporary in
-* processing the saturate's operand, apply the saturate to that
-* instruction.  Otherwise, generate a MOV to do the saturate.
-*
-* Note that we have to be careful to only do this optimization if
-* the instruction in question was what generated src-result.  For
-* example, ir_dereference_array might generate a MUL instruction
-* to create the reladdr, and return us a src reg using that
-* reladdr.  That MUL result is not the value we're trying to
-* saturate.
-*/
-   ir_expression *sat_src_expr = sat_src-as_expression();
-   if (sat_src_expr  (sat_src_expr-operation == ir_binop_mul ||
-   sat_src_expr-operation == ir_binop_add ||
-   sat_src_expr-operation == ir_binop_dot)) {
-  glsl_to_tgsi_instruction *new_inst;
-  new_inst = (glsl_to_tgsi_instruction *)this-instructions.get_tail();
-  new_inst-saturate = true;
-   } else {
-  this-result = get_temp(ir-type);
-  st_dst_reg result_dst = 

Re: [Mesa-dev] [PATCH 10/19] auxiliary/os: introduce os_get_total_physical_memory helper function

2014-08-18 Thread Jon TURNEY

On 14/08/2014 23:18, Emil Velikov wrote:

Cc: Alexander von Gluck IV kallis...@unixzen.com
Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
---
  src/gallium/auxiliary/os/os_misc.c | 64 ++
  src/gallium/auxiliary/os/os_misc.h |  7 +
  2 files changed, 71 insertions(+)


Since this #errors on unknown platforms, teach it about the existence of 
Cygwin.


From 03e0df4455810e255c22a0532b9e66dcc3d60a1d Mon Sep 17 00:00:00 2001
From: Jon TURNEY jon.tur...@dronecode.org.uk
Date: Sun, 17 Aug 2014 17:21:27 +0100
Subject: [PATCH] Teach os_get_physical_memory about Cygwin

Signed-off-by: Jon TURNEY jon.tur...@dronecode.org.uk
---
 src/gallium/auxiliary/os/os_misc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/os/os_misc.c 
b/src/gallium/auxiliary/os/os_misc.c
index 3846a9a..ef84c79 100644
--- a/src/gallium/auxiliary/os/os_misc.c
+++ b/src/gallium/auxiliary/os/os_misc.c
@@ -47,7 +47,7 @@
 #endif
 
 
-#if defined(PIPE_OS_LINUX)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_CYGWIN)
 #  include unistd.h
 #elif defined(PIPE_OS_APPLE) || defined(PIPE_OS_BSD)
 #  include sys/sysctl.h
@@ -111,7 +111,7 @@ os_get_option(const char *name)
 bool
 os_get_total_physical_memory(uint64_t *size)
 {
-#if defined(PIPE_OS_LINUX)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_CYGWIN)
const long phys_pages = sysconf(_SC_PHYS_PAGES);
const long page_size = sysconf(_SC_PAGE_SIZE);
 
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/19] auxiliary/os: introduce os_get_total_physical_memory helper function

2014-08-18 Thread Emil Velikov
On 18/08/14 13:20, Jon TURNEY wrote:
 On 14/08/2014 23:18, Emil Velikov wrote:
 Cc: Alexander von Gluck IV kallis...@unixzen.com
 Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
 ---
   src/gallium/auxiliary/os/os_misc.c | 64
 ++
   src/gallium/auxiliary/os/os_misc.h |  7 +
   2 files changed, 71 insertions(+)
 
 Since this #errors on unknown platforms, teach it about the existence of 
 Cygwin.
 
 
 0001-Teach-os_get_physical_memory-about-Cygwin.patch
 
 
 From 03e0df4455810e255c22a0532b9e66dcc3d60a1d Mon Sep 17 00:00:00 2001
 From: Jon TURNEY jon.tur...@dronecode.org.uk
 Date: Sun, 17 Aug 2014 17:21:27 +0100
 Subject: [PATCH] Teach os_get_physical_memory about Cygwin
 
 Signed-off-by: Jon TURNEY jon.tur...@dronecode.org.uk
I was under the strange impression that the p_config.h will set PIPE_OS_LINUX
for Cygwin. It seem like I got confused with PIPE_OS_UNIX.

Reviewed-by: Emil Velikov emil.l.veli...@gmail.com

 ---
  src/gallium/auxiliary/os/os_misc.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/src/gallium/auxiliary/os/os_misc.c 
 b/src/gallium/auxiliary/os/os_misc.c
 index 3846a9a..ef84c79 100644
 --- a/src/gallium/auxiliary/os/os_misc.c
 +++ b/src/gallium/auxiliary/os/os_misc.c
 @@ -47,7 +47,7 @@
  #endif
  
  
 -#if defined(PIPE_OS_LINUX)
 +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_CYGWIN)
  #  include unistd.h
  #elif defined(PIPE_OS_APPLE) || defined(PIPE_OS_BSD)
  #  include sys/sysctl.h
 @@ -111,7 +111,7 @@ os_get_option(const char *name)
  bool
  os_get_total_physical_memory(uint64_t *size)
  {
 -#if defined(PIPE_OS_LINUX)
 +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_CYGWIN)
 const long phys_pages = sysconf(_SC_PHYS_PAGES);
 const long page_size = sysconf(_SC_PAGE_SIZE);
  
 -- 1.8.5.5
 
 
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] clover: unsure compat::string is \0 terminated

2014-08-18 Thread Francisco Jerez
EdB edb+m...@sigluy.net writes:

 otherwise c_str() is not safe
 ---
  src/gallium/state_trackers/clover/util/compat.hpp | 54 
 ---
  1 file changed, 48 insertions(+), 6 deletions(-)

 diff --git a/src/gallium/state_trackers/clover/util/compat.hpp 
 b/src/gallium/state_trackers/clover/util/compat.hpp
 index 6f0f7cc..7ca1f85 100644
 --- a/src/gallium/state_trackers/clover/util/compat.hpp
 +++ b/src/gallium/state_trackers/clover/util/compat.hpp
 @@ -197,7 +197,7 @@ namespace clover {
  return _p[i];
   }
  
 -  private:
 +  protected:
   iterator _p;  //memory array
   size_type _s; //size
   size_type _c; //capacity
 @@ -306,18 +306,56 @@ namespace clover {
  
class string : public vectorchar {
public:
 - string() : vector() {
 + string() : vector(0, 1) {
 +_p[_s - 1] = '\0';
   }
  
 - string(const char *p) : vector(p, std::strlen(p)) {
 + string(const char *p) : vector(p, std::strlen(p) + 1) {
 +_p[_s - 1] = '\0';
   }
  
   templatetypename C
 - string(const C v) : vector(v) {
 + string(const C v) : vector(*v.begin(), v.size() + 1) {
 +_p[_s - 1] = '\0';
   }
  
 - operator std::string() const {
 -return std::string(begin(), end());
 + void
 + reserve(size_type m) {
 +vector::reserve(m + 1);
 + }
 +
 + void
 + resize(size_type m, char x = '\0') {
 +vector::resize(m + 1, x);
 +_p[_s - 1] = '\0';
 + }
 +
 + void
 + push_back(char x) {
 +reserve(_s + 1);
 +_p[_s - 1] = x;
 +_p[_s] = '\0';
 +++_s;
 + }
 +
 + size_type
 + size() const {
 +return _s - 1;
 + }
 +
 + size_type
 + capacity() const {
 +return _c - 1;
 + }
 +
 + iterator
 + end() {
 +return _p + size();
 + }
 +
 + const_iterator
 + end() const {
 +return _p + size();
   }
  

At this point where all methods from the base class need to be redefined
it probably stops making sense to use inheritance instead of
aggregation.  Once we've done that fixing c_str() gets a lot easier (two
lines of code) because we can just declare the container as mutable and
fix up the NULL terminator when c_str() is called.  Both changes
attached.

   const char *
 @@ -325,6 +363,10 @@ namespace clover {
  return begin();
   }
  
 + operator std::string() const {
 +return std::string(begin(), end());
 + }
 +
   const char *
   find(const string s) const {
  for (size_t i = 0; i + s.size()  size(); ++i) {
 -- 
 2.0.4

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

From e1e97e017f25f4ed1c75bae71095ffa116374654 Mon Sep 17 00:00:00 2001
From: Francisco Jerez curroje...@riseup.net
Date: Mon, 18 Aug 2014 15:21:52 +0300
Subject: [PATCH 1/2] clover/util: Implement compat::string using aggregation
 instead of inheritance.

---
 src/gallium/state_trackers/clover/util/compat.hpp | 76 +--
 1 file changed, 71 insertions(+), 5 deletions(-)

diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp
index a4e3938..e0ab965 100644
--- a/src/gallium/state_trackers/clover/util/compat.hpp
+++ b/src/gallium/state_trackers/clover/util/compat.hpp
@@ -280,20 +280,83 @@ namespace clover {
  size_t offset;
   };
 
-  class string : public vectorchar {
+  class string {
   public:
- string() : vector() {
+ typedef char *iterator;
+ typedef const char *const_iterator;
+ typedef char value_type;
+ typedef char reference;
+ typedef const char const_reference;
+ typedef std::ptrdiff_t difference_type;
+ typedef std::size_t size_type;
+
+ string() : v() {
  }
 
- string(const char *p) : vector(p, std::strlen(p)) {
+ string(const char *p) : v(p, std::strlen(p)) {
  }
 
  templatetypename C
- string(const C v) : vector(v) {
+ string(const C v) : v(v) {
  }
 
  operator std::string() const {
-return std::string(begin(), end());
+return std::string(v.begin(), v.end());
+ }
+
+ void
+ reserve(size_type n) {
+v.reserve(n);
+ }
+
+ void
+ resize(size_type n, char x = char()) {
+v.resize(n, x);
+ }
+
+ void
+ push_back(char x) {
+v.push_back(x);
+ }
+
+ size_type
+ size() const {
+return v.size();
+ 

Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Roland Scheidegger
Am 16.08.2014 02:12, schrieb Connor Abbott:
 I know what you might be thinking right now. Wait, *another* IR? Don't
 we already have like 5 of those, not counting all the driver-specific
 ones? Isn't this stuff complicated enough already? Well, there are some
 pretty good reasons to start afresh (again...). In the years we've been
 using GLSL IR, we've come to realize that, in fact, it's not what we
 want *at all* to do optimizations on. Ian has done a talk at FOSDEM that
 highlights some of the problems they've run into:
 
 https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e
 
 But here's the summary:
 
 * GLSL IR is way too much of a memory hog, since it has to make a new
 variable for each temporary the compiler creates and then each time you
 want to dereference that temporary you need to create an
 ir_dereference_variable that points to it which is also very
 cache-unfriendly (downright cache-mean!).
 
 * The expression trees were originally added so that we could do
 pattern matching to automatically optimize things, but this turned out
 to be both very difficult to do and not very helpful. Instead, all it
 does is add more complexity to the IR without much benefit - with SSA or
 having proper use-def chains, we could get back what the trees give us
 while also being able to do lots more optimizations.
 
 * We don't have the concept of basic blocks in GLSL IR, which makes a
 lot of optimizations harder because they were originally designed with
 basic blocks in mind - take, for example, my SSA series. I had to map a
 whole lot of concepts that were based on the control flow graph to this
 tree of statements that GLSL IR uses, and the end result wound up
 looking nothing at all like the original paper. This problem gets even
 worse for things like e.g. Global Code Motion that depend upon having
 the dominance tree.
 
 I originally wanted to modify GLSL IR to fix these problems by adding
 new instruction types that would address these issues and then
 converting back and forth between the old and the new form, but I
 realized that fixing all the problems would basically mean a complete
 rewrite - and if that's the case, then why don't we start from scratch?
 So I took Ken's suggestions and started designing, and then at Intel
 over the summer started implementing, a completely new IR which I call
 NIR that's at a lower level than GLSL IR, but still high-level enough to
 be mostly device-independant (different drivers may have different
 passes and different ways of lowering e.g.  matrix multiplies) so that
 we can do generic optimizations on it. Having support for SSA from the
 beginning was also a must, because lots of optimisations that we really
 want for cleaning up DX9-translated games are either a lot easier in or
 made possible by SSA. I also made the decision for it to be typeless,
 because that's what the cool kids are all doing :) and for a
 lower-level, flat IR it seemed like the thing to do (it could have gone
 either way, though). So the key design points of NIR (pronounced either
 like near as in NIR is near! or to rhyme with burr) are:
 
 * It's flat (no expression trees)
 
 * It's typeless
 
 * Modifiers (abs, negate, saturate), swizzles, and write masks are part
 of ALU instructions
 
 * It includes enough GLSL-like things (variables that you can load from
 or store to, function calls) to be hardware-agnostic (although we don't
 have a way to represent matrix multiplies right now, but that could
 easily be added) to be able to do optimizations at a high level, while
 having lowering passes that convert variables to registers and
 input/output/uniform loads/stores that will open up more opportunities
 for optimization and save memory while being more hardware-specific.
 
 * Control flow consists of a tree of if statements and loops, like in
 GLSL IR, except the leaves of the tree are now basic blocks instead of
 instructions. Also, each basic block keeps track of its successors and
 predecessors, so the control flow graph is explicit in the IR.
 
 * SSA is natively supported, and SSA uses point directly to the SSA
 definition, which means that the use-def chains are always there, and
 def-use chains are kept by tracking the set of all uses for each
 definition.
 
 * It's written in C.
 
 (see the README in patch 3 and nir.h in patch 4 for more details)
 
 Some things that are missing or could be improved:
 
 * There's currently no alias tracking for inputs, outputs, and uniforms.
 This is especially important for uniforms because we don't pack them
 like we pack inputs and outputs.
 
 * We need a way to represent matrix multiplies so that we can do
 matrix-flipping optimizations 

Re: [Mesa-dev] [PATCH 1/9] glsl: Optimize min/max expression trees

2014-08-18 Thread Petri Latvala

On 08/14/2014 04:33 AM, Ian Romanick wrote:

On 07/29/2014 02:36 AM, Petri Latvala wrote:

Add an optimization pass that drops min/max expression operands that
can be proven to not contribute to the final result. The algorithm is
similar to alpha-beta pruning on a minmax search, from the field of
AI.

This optimization pass can optimize min/max expressions where operands
are min/max expressions. Such code can appear in shaders by itself, or
as the result of clamp() or AMD_shader_trinary_minmax functions.

This optimization pass improves the generated code for piglit's
AMD_shader_trinary_minmax tests as follows:

total instructions in shared programs: 75 - 67 (-10.67%)
instructions in affected programs: 60 - 52 (-13.33%)
GAINED:0
LOST:  0

All tests (max3, min3, mid3) improved.

And I assume no piglit regressions?


Indeed no regressions, or new successes. I wrote that in the cover 
letter, I should have written it also in this patch's commit message...




Also... have you tried this in combination with Abdiel's related work on
saturates?


Tested the combination now, after some fighting with shader-db. The 
results are the same, except :

One shader from
Dungeon Defenders is hurt by shader-db metrics (26 - 28), because of
dropping of a (constant float (0.0)) operand, which was
compiled to a saturate modifier.


This shader compiled into the same code with or without my patches.

Talked with Abdiel about the combination, recapping here: Our changes 
are orthogonal and not conflicting, so we can both proceed at our own paces.




Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76861
Signed-off-by: Petri Latvala petri.latv...@intel.com
---
  src/glsl/Makefile.sources   |   1 +
  src/glsl/glsl_parser_extras.cpp |   1 +
  src/glsl/ir_optimization.h  |   1 +
  src/glsl/opt_minmax.cpp | 395 
  4 files changed, 398 insertions(+)
  create mode 100644 src/glsl/opt_minmax.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index b54eae7..1ee80a3 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -95,6 +95,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/opt_flip_matrices.cpp \
$(GLSL_SRCDIR)/opt_function_inlining.cpp \
$(GLSL_SRCDIR)/opt_if_simplification.cpp \
+   $(GLSL_SRCDIR)/opt_minmax.cpp \
$(GLSL_SRCDIR)/opt_noop_swizzle.cpp \
$(GLSL_SRCDIR)/opt_rebalance_tree.cpp \
$(GLSL_SRCDIR)/opt_redundant_jumps.cpp \
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 890123a..9f57ef3 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1561,6 +1561,7 @@ do_common_optimization(exec_list *ir, bool linked,
 else
progress = do_constant_variable_unlinked(ir) || progress;
 progress = do_constant_folding(ir) || progress;
+   progress = do_minmax_prune(ir) || progress;
 progress = do_cse(ir) || progress;
 progress = do_rebalance_tree(ir) || progress;
 progress = do_algebraic(ir, native_integers, options) || progress;
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index b83c225..9d22585 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -98,6 +98,7 @@ bool opt_flatten_nested_if_blocks(exec_list *instructions);
  bool do_discard_simplification(exec_list *instructions);
  bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0);
  bool do_mat_op_to_vec(exec_list *instructions);
+bool do_minmax_prune(exec_list *instructions);
  bool do_noop_swizzle(exec_list *instructions);
  bool do_structure_splitting(exec_list *instructions);
  bool do_swizzle_swizzle(exec_list *instructions);
diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp
new file mode 100644
index 000..5656059
--- /dev/null
+++ b/src/glsl/opt_minmax.cpp
@@ -0,0 +1,395 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF 

Re: [Mesa-dev] [PATCH] squash! glsl: Optimize min/max expression trees

2014-08-18 Thread Petri Latvala

On 08/14/2014 07:04 AM, Matt Turner wrote:

---
I'd squash this in at minimum. The changes are

  - Whitespace
  - Removal of unnecessary destructor
  - Renaming one and two to a and b (one-value.u[c0]  
two-value.u[c0]...)
  - continue - break
  - assert(!...) - unreachable
  - Not doing assignments in if conditionals
  - Marking swizzle_if_required as static


Thanks, I'll squash this in.


I also think less_all_components should just return an enum like
{ MIXED, EQUAL, LESS, GREATER }, rather than setting a variable in
the class. It, as well as smaller/larger_constant, can then be
static functions outside of the visitor.

Yes, I'll try what it looks like with that.


I think the algorithm itself looks correct.

  src/glsl/opt_minmax.cpp | 145 +---
  1 file changed, 63 insertions(+), 82 deletions(-)

diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp
index 5656059..b987386 100644
--- a/src/glsl/opt_minmax.cpp
+++ b/src/glsl/opt_minmax.cpp
@@ -37,12 +37,10 @@
  #include glsl_types.h
  #include main/macros.h
  
-namespace

-{
-class minmax_range
-{
-public:
+namespace {
  
+class minmax_range {

+public:
 minmax_range(ir_constant *low = NULL, ir_constant *high = NULL)
 {
range[0] = low;
@@ -60,60 +58,45 @@ public:
  class ir_minmax_visitor : public ir_rvalue_enter_visitor {
  public:
 ir_minmax_visitor()
-  : progress(false)
-  , valid(true)
-   {
-   }
-
-   virtual ~ir_minmax_visitor()
+  : progress(false), valid(true)
 {
 }
  
-   bool

-   less_all_components(ir_constant *one, ir_constant *two);
-
-   ir_constant *
-   smaller_constant(ir_constant *one, ir_constant *two);
-
-   ir_constant *
-   larger_constant(ir_constant *one, ir_constant *two);
+   bool less_all_components(ir_constant *a, ir_constant *b);
+   ir_constant *smaller_constant(ir_constant *a, ir_constant *b);
+   ir_constant *larger_constant(ir_constant *a, ir_constant *b);
  
-   minmax_range

-   combine_range(minmax_range r0, minmax_range r1, bool ismin);
+   minmax_range combine_range(minmax_range r0, minmax_range r1, bool ismin);
  
-   minmax_range

-   range_intersection(minmax_range r0, minmax_range r1);
+   minmax_range range_intersection(minmax_range r0, minmax_range r1);
  
-   minmax_range

-   get_range(ir_rvalue *rval);
+   minmax_range get_range(ir_rvalue *rval);
  
-   ir_rvalue *

-   prune_expression(ir_expression *expr, minmax_range baserange);
+   ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange);
  
-   void

-   handle_rvalue(ir_rvalue **rvalue);
+   void handle_rvalue(ir_rvalue **rvalue);
  
 bool progress;

 bool valid;
  };
  
  /*

- * Returns true if all vector components of `one' are less than of `two'.
+ * Returns true if all vector components of `a' are less than of `b'.
   *
   * If there are vector components that are less while others are greater, the
   * visitor is marked invalid and no further changes will be made to the IR.
   */
  bool
-ir_minmax_visitor::less_all_components(ir_constant *one, ir_constant *two)
+ir_minmax_visitor::less_all_components(ir_constant *a, ir_constant *b)
  {
-   assert(one != NULL);
-   assert(two != NULL);
+   assert(a != NULL);
+   assert(b != NULL);
  
-   assert(one-type-base_type == two-type-base_type);

+   assert(a-type-base_type == b-type-base_type);
  
-   unsigned oneinc = one-type-is_scalar() ? 0 : 1;

-   unsigned twoinc = two-type-is_scalar() ? 0 : 1;
-   unsigned components = MAX2(one-type-components(), 
two-type-components());
+   unsigned a_inc = a-type-is_scalar() ? 0 : 1;
+   unsigned b_inc = b-type-is_scalar() ? 0 : 1;
+   unsigned components = MAX2(a-type-components(), b-type-components());
  
 /* No early escape. We need to go through all components and mark the

  * visitor as invalid if comparison yields less for some components and
@@ -127,34 +110,34 @@ ir_minmax_visitor::less_all_components(ir_constant *one, 
ir_constant *two)
  
 for (unsigned i = 0, c0 = 0, c1 = 0;

  i  components;
-c0 += oneinc, c1 += twoinc, ++i) {
-  switch (one-type-base_type) {
+c0 += a_inc, c1 += b_inc, ++i) {
+  switch (a-type-base_type) {
case GLSL_TYPE_UINT:
- if (one-value.u[c0]  two-value.u[c1])
+ if (a-value.u[c0]  b-value.u[c1])
  foundless = true;
- else if (one-value.u[c0]  two-value.u[c1])
+ else if (a-value.u[c0]  b-value.u[c1])
  foundgreater = true;
   else
  foundequal = true;
- continue;
+ break;
case GLSL_TYPE_INT:
- if (one-value.i[c0]  two-value.i[c1])
+ if (a-value.i[c0]  b-value.i[c1])
  foundless = true;
- else if (one-value.i[c0]  two-value.i[c1])
+ else if (a-value.i[c0]  b-value.i[c1])
  foundgreater = true;
   else
  foundequal = true;
- continue;
+ break;
case 

[Mesa-dev] [Bug 82538] Super Maryo Chronicles fails with st/mesa assertion failure

2014-08-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=82538

--- Comment #4 from smoki smoki00...@gmail.com ---
(In reply to comment #2)
 (In reply to comment #1)
 I can still reproduce it with current Mesa Git. Does your Mesa build have
 assertions enabled?
 

 Ah sorry did not have it that time, so yeah bug is there.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Marek Olšák
On Mon, Aug 18, 2014 at 2:44 PM, Roland Scheidegger srol...@vmware.com wrote:
 Am 16.08.2014 02:12, schrieb Connor Abbott:
 I know what you might be thinking right now. Wait, *another* IR? Don't
 we already have like 5 of those, not counting all the driver-specific
 ones? Isn't this stuff complicated enough already? Well, there are some
 pretty good reasons to start afresh (again...). In the years we've been
 using GLSL IR, we've come to realize that, in fact, it's not what we
 want *at all* to do optimizations on. Ian has done a talk at FOSDEM that
 highlights some of the problems they've run into:

 https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e

 But here's the summary:

 * GLSL IR is way too much of a memory hog, since it has to make a new
 variable for each temporary the compiler creates and then each time you
 want to dereference that temporary you need to create an
 ir_dereference_variable that points to it which is also very
 cache-unfriendly (downright cache-mean!).

 * The expression trees were originally added so that we could do
 pattern matching to automatically optimize things, but this turned out
 to be both very difficult to do and not very helpful. Instead, all it
 does is add more complexity to the IR without much benefit - with SSA or
 having proper use-def chains, we could get back what the trees give us
 while also being able to do lots more optimizations.

 * We don't have the concept of basic blocks in GLSL IR, which makes a
 lot of optimizations harder because they were originally designed with
 basic blocks in mind - take, for example, my SSA series. I had to map a
 whole lot of concepts that were based on the control flow graph to this
 tree of statements that GLSL IR uses, and the end result wound up
 looking nothing at all like the original paper. This problem gets even
 worse for things like e.g. Global Code Motion that depend upon having
 the dominance tree.

 I originally wanted to modify GLSL IR to fix these problems by adding
 new instruction types that would address these issues and then
 converting back and forth between the old and the new form, but I
 realized that fixing all the problems would basically mean a complete
 rewrite - and if that's the case, then why don't we start from scratch?
 So I took Ken's suggestions and started designing, and then at Intel
 over the summer started implementing, a completely new IR which I call
 NIR that's at a lower level than GLSL IR, but still high-level enough to
 be mostly device-independant (different drivers may have different
 passes and different ways of lowering e.g.  matrix multiplies) so that
 we can do generic optimizations on it. Having support for SSA from the
 beginning was also a must, because lots of optimisations that we really
 want for cleaning up DX9-translated games are either a lot easier in or
 made possible by SSA. I also made the decision for it to be typeless,
 because that's what the cool kids are all doing :) and for a
 lower-level, flat IR it seemed like the thing to do (it could have gone
 either way, though). So the key design points of NIR (pronounced either
 like near as in NIR is near! or to rhyme with burr) are:

 * It's flat (no expression trees)

 * It's typeless

 * Modifiers (abs, negate, saturate), swizzles, and write masks are part
 of ALU instructions

 * It includes enough GLSL-like things (variables that you can load from
 or store to, function calls) to be hardware-agnostic (although we don't
 have a way to represent matrix multiplies right now, but that could
 easily be added) to be able to do optimizations at a high level, while
 having lowering passes that convert variables to registers and
 input/output/uniform loads/stores that will open up more opportunities
 for optimization and save memory while being more hardware-specific.

 * Control flow consists of a tree of if statements and loops, like in
 GLSL IR, except the leaves of the tree are now basic blocks instead of
 instructions. Also, each basic block keeps track of its successors and
 predecessors, so the control flow graph is explicit in the IR.

 * SSA is natively supported, and SSA uses point directly to the SSA
 definition, which means that the use-def chains are always there, and
 def-use chains are kept by tracking the set of all uses for each
 definition.

 * It's written in C.

 (see the README in patch 3 and nir.h in patch 4 for more details)

 Some things that are missing or could be improved:

 * There's currently no alias tracking for inputs, outputs, and uniforms.
 This is especially important for uniforms because we don't pack them
 like we pack inputs and outputs.

 * We need a way to represent 

Re: [Mesa-dev] [PATCH] squash! glsl: Optimize min/max expression trees

2014-08-18 Thread Petri Latvala

On 08/14/2014 11:00 AM, Connor Abbott wrote:



Another thing I'd like to see is to change minmax_range to call things
low and high instead of range[0] and range[1]. This helps
readability, and the tricks with indirect addressing that having an
array lets you do are things we really shouldn't be doing anyways
because it's hard to follow.


Sure, changing.



As I mentioned before, swizzle_if_required() should probably use the
ir_builder swizzle helpers.


I copied swizzle_if_required from opt_algebraic. I'll squeeze in a patch 
that changes that as well. Or actually just refactor the function to 
live somewhere where it's reusable.





I'm still not convinced that the algorithm is the best way to go about
it. Right now, AFAICT, we do something like:

- Pass in a base range, which is what the min's and max's above us
in the tree will clamp the value we return to
- Get the ranges for each subexpression (this is a recursive call)
- Check and see if each operand is unnecessary (i.e. its range is
strictly greater than the base range or strictly greater than the
other argument for mins, the other way around for max's)

As another thing, the logic for this part could be made a *lot*
clearer by rearranging the code and commenting. I'd do something like:

bool is_redundant = false /* whether this operand will never affect
the final value of the min-max tree */

if (is_min) {
/* if this operand will always be greater than the other one, it's
redundant */
if (limit[i].low  limit[1 - i].high)
   is_redundant = true;

/* if this operand is always greater than baserange, then even if
it's smaller than the other one it'll get clamped so it's redundant */
if (limit[i].low  baserange.high)
   is_redundant = true;
} else {
... the exact same logic mirrored ...
}

- Recurse into the subexpressions, computing the new baserange.

What I think we should do instead is change prune_expression() to also
return the range for the expression (it's now returning two things, so
one would have to be passed via a class variable), so it would look
like:

- Pass in the base range
- If this is a constant, return ourself and the range with low == high
- Recurse into both subexpressions, setting both the range (limits[i])
and the new subexpression
- If one of the subexpressions is redundant, return the other
subexpression and its range
- Otherwise, return ourself and the combination of the ranges

This will allow us to do the recursion only once, instead of once in
get_range() and once in prune_expression(), which will make things
simpler and faster.



You mean have only prune_expression(), cut out get_range()?

I tried hard to have this recurse only once and it looks impossible to 
me. Consider this (hopefully this ascii art gets through fine):


 max
  /   \
 max max
/   \   /   \
   3a   b2

(If ascii art failed, it'smax(max(3, a), max(b, 2)) )

a and b are variables, 2 and 3 constants. 2 is to be dropped from the 
right subtree of the top max, but for that we need the 3 from the left 
subtree. prune_expression() on the left subtree will get us the 3 as the 
limit, which correctly drops the 2 when recursed to the right subtree. 
What about if 3 and 2 are swapped in the tree?


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] glsl: Refactor the python test case generator

2014-08-18 Thread Petri Latvala

On 08/13/2014 01:51 AM, Dylan Baker wrote:

On Tuesday, July 29, 2014 12:36:33 PM Petri Latvala wrote:

Move the IR sexp builder helpers and test script creation parts of
tests/lower_jumps/create_test_cases.py into tests/test_case_generator.py

No functional changes.

Signed-off-by: Petri Latvala petri.latv...@intel.com
---
  src/glsl/tests/lower_jumps/create_test_cases.py | 336 +++-
  src/glsl/tests/test_case_generator.py   | 293 +
  2 files changed, 334 insertions(+), 295 deletions(-)
  create mode 100644 src/glsl/tests/test_case_generator.py

diff --git a/src/glsl/tests/lower_jumps/create_test_cases.py 
b/src/glsl/tests/lower_jumps/create_test_cases.py
index 3be1079..9783627 100644
--- a/src/glsl/tests/lower_jumps/create_test_cases.py
+++ b/src/glsl/tests/lower_jumps/create_test_cases.py
@@ -27,278 +27,9 @@ import re
  import subprocess
  import sys
  
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir

+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access 
to sexps.py and test_case_generator.py, which are in parent dir
  from sexps import *
-
-def make_test_case(f_name, ret_type, body):
-Create a simple optimization test case consisting of a single
-function with the given name, return type, and body.
-
-Global declarations are automatically created for any undeclared
-variables that are referenced by the function.  All undeclared
-variables are assumed to be floats.
-
-check_sexp(body)
-declarations = {}
-def make_declarations(sexp, already_declared = ()):
-if isinstance(sexp, list):
-if len(sexp) == 2 and sexp[0] == 'var_ref':
-if sexp[1] not in already_declared:
-declarations[sexp[1]] = [
-'declare', ['in'], 'float', sexp[1]]
-elif len(sexp) == 4 and sexp[0] == 'assign':
-assert sexp[2][0] == 'var_ref'
-if sexp[2][1] not in already_declared:
-declarations[sexp[2][1]] = [
-'declare', ['out'], 'float', sexp[2][1]]
-make_declarations(sexp[3], already_declared)
-else:
-already_declared = set(already_declared)
-for s in sexp:
-if isinstance(s, list) and len(s) = 4 and \
-s[0] == 'declare':
-already_declared.add(s[3])
-else:
-make_declarations(s, already_declared)
-make_declarations(body)
-return declarations.values() + \
-[['function', f_name, ['signature', ret_type, ['parameters'], body]]]
-
-
-# The following functions can be used to build expressions.
-
-def const_float(value):
-Create an expression representing the given floating point value.
-return ['constant', 'float', ['{0:.6f}'.format(value)]]
-
-def const_bool(value):
-Create an expression representing the given boolean value.
-
-If value is not a boolean, it is converted to a boolean.  So, for
-instance, const_bool(1) is equivalent to const_bool(True).
-
-return ['constant', 'bool', ['{0}'.format(1 if value else 0)]]
-
-def gt_zero(var_name):
-Create Construct the expression var_name  0
-return ['expression', 'bool', '', ['var_ref', var_name], const_float(0)]
-
-
-# The following functions can be used to build complex control flow
-# statements.  All of these functions return statement lists (even
-# those which only create a single statement), so that statements can
-# be sequenced together using the '+' operator.
-
-def return_(value = None):
-Create a return statement.
-if value is not None:
-return [['return', value]]
-else:
-return [['return']]
-
-def break_():
-Create a break statement.
-return ['break']
-
-def continue_():
-Create a continue statement.
-return ['continue']
-
-def simple_if(var_name, then_statements, else_statements = None):
-Create a statement of the form
-
-if (var_name  0.0) {
-   then_statements
-} else {
-   else_statements
-}
-
-else_statements may be omitted.
-
-if else_statements is None:
-else_statements = []
-check_sexp(then_statements)
-check_sexp(else_statements)
-return [['if', gt_zero(var_name), then_statements, else_statements]]
-
-def loop(statements):
-Create a loop containing the given statements as its loop
-body.
-
-check_sexp(statements)
-return [['loop', statements]]
-
-def declare_temp(var_type, var_name):
-Create a declaration of the form
-
-(declare (temporary) var_type var_name)
-
-return [['declare', ['temporary'], var_type, var_name]]
-
-def assign_x(var_name, value):
-Create a statement that assigns value to the variable
-var_name.  The assignment uses the mask (x).
-
-

Re: [Mesa-dev] [PATCH 9/9] glsl: Add tests for minmax prune

2014-08-18 Thread Petri Latvala

On 08/13/2014 01:59 AM, Dylan Baker wrote:

On Tuesday, July 29, 2014 12:36:39 PM Petri Latvala wrote:

tests/minmax/create_test_cases.py generates the following tests:

multiple_min*.opt_test:
  Construct a tree of min expressions for all permutations of a var_ref
  and three constants. They should all optimize to a single min with
  the variable and the smallest constant.
multiple_max*.opt_test:
  Same as above, for max.
mid3opt*.opt_test:
  Test that code generated from a mid3() for two constants and a
  var_ref optimizes to a single max and a single min.
mixed_vectors*.opt_test:
  Test that the optimization pass doesn't modify expression trees with
  constant vectors where some components compare as less, some as
  greater.

Signed-off-by: Petri Latvala petri.latv...@intel.com
---
  src/glsl/tests/minmax/.gitignore   |   3 +
  src/glsl/tests/minmax/create_test_cases.py | 151 +
  2 files changed, 154 insertions(+)
  create mode 100644 src/glsl/tests/minmax/.gitignore
  create mode 100644 src/glsl/tests/minmax/create_test_cases.py

diff --git a/src/glsl/tests/minmax/.gitignore b/src/glsl/tests/minmax/.gitignore
new file mode 100644
index 000..e98df62
--- /dev/null
+++ b/src/glsl/tests/minmax/.gitignore
@@ -0,0 +1,3 @@
+*.opt_test
+*.expected
+*.out
diff --git a/src/glsl/tests/minmax/create_test_cases.py 
b/src/glsl/tests/minmax/create_test_cases.py
new file mode 100644
index 000..4f78980
--- /dev/null
+++ b/src/glsl/tests/minmax/create_test_cases.py
@@ -0,0 +1,151 @@
+# coding=utf-8
+#
+# Copyright © 2014 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the Software),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import os
+import os.path
+import re
+import subprocess
+import sys
+import itertools

This comment applies to all the patches.
You're importing a bunch of modules you're not using, you should remove
any that are not used.

In this file os.path, re, and subprocess are not used.


Oh, yes, leftovers from the refactoring. Fix inc.


+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+from sexps import *
+from test_case_generator import *
+
+def test_multiple_max():
+doc_string = Test that multiple constants in multiple max expressions are reduced to a 
single max.

What is this? If it's a docstring it's not assigned, it's just a triple
quoted string at the start of the function or class. Fix this for the
other functions as well


I followed the convention from the jump lowering tests. It's the string 
printed to the generated test script. I'll fix the single-line strings 
to normal quotes.



+
+operands = [const_float(1),
+const_float(2),
+const_float(3),
+['var_ref', 'a']]
+
+c = 1
+for ops in itertools.permutations(operands):
+maxtree1 = reduce(lambda a, b: max_(a, b, 'float'), ops)
+maxtree2 = reduce(lambda a, b: max_(b, a, 'float'), ops)
+
+expected = max_(const_float(3), ['var_ref', 'a'], 'float')
+
+input_sexp = make_test_case('main', 'void', (
+assign_x('b', maxtree1) +
+assign_x('c', maxtree2)
+))
+expected_sexp = make_test_case('main', 'void', (
+assign_x('b', expected) +
+assign_x('c', expected)
+))
+
+create_test_case(doc_string, input_sexp, expected_sexp, 
'multiple_max{0}'.format(c), 'do_minmax_prune')
+c += 1
+
+def test_multiple_min():
+doc_string = Test that multiple constants in multiple min expressions are reduced to a 
single min.
+
+operands = [const_float(1),
+const_float(2),
+const_float(3),
+['var_ref', 'a']]
+
+c = 1
+for ops in itertools.permutations(operands):
+mintree1 = reduce(lambda a, b: min_(a, b, 'float'), ops)
+mintree2 = reduce(lambda a, b: min_(b, a, 'float'), ops)
+
+expected = min_(const_float(1), 

Re: [Mesa-dev] [PATCH 1/2] c11/threads: add missing brackets around _MTX_INITIALIZER_NP

2014-08-18 Thread Jose Fonseca

On 04/08/14 18:24, Emil Velikov wrote:

On 02/08/14 02:51, Emil Velikov wrote:

On 02/08/14 00:26, Ian Romanick wrote:

On 08/01/2014 09:41 AM, Emil Velikov wrote:

... for win32 builds. Spotted this warning when I've imported the
library into waffle, and gave mingw-w64-gcc a bash at compiling it.

src/waffle/core/wcore_display.c:37:5: warning: missing braces around
initializer [-Wmissing-braces]
static mtx_t mutex = _MTX_INITIALIZER_NP;
^

Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
---
  include/c11/threads_win32.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/c11/threads_win32.h b/include/c11/threads_win32.h
index 5298a84..35880ba 100644
--- a/include/c11/threads_win32.h
+++ b/include/c11/threads_win32.h
@@ -85,7 +85,7 @@ Configuration macro:
  #define TSS_DTOR_ITERATIONS 1

  // FIXME: temporary non-standard hack to ease transition
-#define _MTX_INITIALIZER_NP {(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0}
+#define _MTX_INITIALIZER_NP {{(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0}}


This is because CRITICAL_SECTION is actually a typedef of a pointer to
some structure type, so it needs to be initialized like an array of
strcutres.  Yeah?


I wish I could agree.

CRITICAL_SECTION is a typedef of a typedef'ed struct (no pointers yet) where
the first member is a struct *, as seen below.

typedef B CRITICAL_SECTION;
typedef struct A {
struct *bla;
...
} B;

I remember spending a few hours reading and experimenting with this and every
way I looked at it current code seems sane. In the end I've smashed the
brackets not to pollute the build log and carried on with other stuff :) Don't
think I have checked if MSVC complained about the issue though. Will give it a
try next time I reboot.



I searched a bit on the net, and I could not find a single example of
initializing a win32 CRITICAL_SECTION this way.  Is this a good idea?
The FIXME comment doesn't inspire confidence...


Same here. AFAICS one should init the mutex via InitializeCriticalSection or
InitializeCriticalSectionAndSpinCount. Either of which is very Win32 specific
and not at all portable. Perhaps Jose (the author) can share some more
insights on the topic ?


Fun stuff. MSVC produces _no_ warnings with or without this patch. Not sure
what exactly is happening here, perhaps I'm hitting some obscure mingw-w64
(gcc?) bug ?


Yes, it's weird.  {(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0} always 
worked well for me, both MSVC and Mingw, without warnings.


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] c11/threads: add missing brackets around _MTX_INITIALIZER_NP

2014-08-18 Thread Jose Fonseca

On 02/08/14 00:26, Ian Romanick wrote:

On 08/01/2014 09:41 AM, Emil Velikov wrote:

... for win32 builds. Spotted this warning when I've imported the
library into waffle, and gave mingw-w64-gcc a bash at compiling it.

src/waffle/core/wcore_display.c:37:5: warning: missing braces around
initializer [-Wmissing-braces]
static mtx_t mutex = _MTX_INITIALIZER_NP;
^

Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
---
  include/c11/threads_win32.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/c11/threads_win32.h b/include/c11/threads_win32.h
index 5298a84..35880ba 100644
--- a/include/c11/threads_win32.h
+++ b/include/c11/threads_win32.h
@@ -85,7 +85,7 @@ Configuration macro:
  #define TSS_DTOR_ITERATIONS 1

  // FIXME: temporary non-standard hack to ease transition
-#define _MTX_INITIALIZER_NP {(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0}
+#define _MTX_INITIALIZER_NP {{(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0}}


This is because CRITICAL_SECTION is actually a typedef of a pointer to
some structure type, so it needs to be initialized like an array of
strcutres.  Yeah?

I searched a bit on the net, and I could not find a single example of
initializing a win32 CRITICAL_SECTION this way.  Is this a good idea?


It's unavoidable.

src/gallium/auxiliary/os/os_thread.h used to have a comment with a link 
to this:


  http://locklessinc.com/articles/pthreads_on_windows/

But it got lost with the c11/threads.h introduction.

It' probably worth adding this link once again.


The FIXME comment doesn't inspire confidence...


That comment is because static initializers are not part of the C 
standard.   This works fine from a Win32 POV.


In other words, we should stop using static initializers, not because 
they cause problems on Windows, but because it's not part of the C11 
standard, hence we'll be in trouble the day we want to use system 
provided C11 headers instead of our own.


include/c11/threads_posix.h has the same:

  // FIXME: temporary non-standard hack to ease transition
  #define _MTX_INITIALIZER_NP PTHREAD_MUTEX_INITIALIZER


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] c11/threads: correct assertion

2014-08-18 Thread Jose Fonseca

On 02/08/14 00:16, Ian Romanick wrote:

On 08/01/2014 09:41 AM, Emil Velikov wrote:

We should assert when either the function or the flag pointer
is null or we'll end up with a null reference a few lines later.

Currently unused by mesa thus it has gone unnoticed.

Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
---
  include/c11/threads_win32.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/c11/threads_win32.h b/include/c11/threads_win32.h
index 35880ba..a95cb78 100644
--- a/include/c11/threads_win32.h
+++ b/include/c11/threads_win32.h
@@ -296,7 +296,7 @@ static void impl_tss_dtor_invoke()
  static inline void
  call_once(once_flag *flag, void (*func)(void))
  {
-assert(!flag  !func);
+assert(flag  func);


This is why I generally prefer to compare pointers to NULL instead of
just using them as booleans.

 assert(flag == NULL  func == NULL);

is much more obviously wrong than the current code.  Either way, this
patch is

Reviewed-by: Ian Romanick ian.d.roman...@intel.com


Yep. My bad.

I added this assertion to see if I could silence MSVC static code 
analyzer's warnings.  It was untested.  And it didn't silence the 
warnings (probably because of this) so I shouldn't have commited it at all.


Jose

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18/20] i965/fs: Preserve CFG in the SEL peephole.

2014-08-18 Thread Pohjolainen, Topi
On Thu, Jul 24, 2014 at 07:54:25PM -0700, Matt Turner wrote:
 ---
  src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 15 +--
  1 file changed, 9 insertions(+), 6 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
 index d64cd98..f609138 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
 @@ -212,23 +212,26 @@ fs_visitor::opt_peephole_sel()
if (brw-gen == 6  if_inst-conditional_mod) {
   fs_inst *cmp_inst = CMP(reg_null_d, if_inst-src[0], 
 if_inst-src[1],
   if_inst-conditional_mod);
 - if_inst-insert_before(cmp_inst);
 + if_inst-insert_before(block, cmp_inst);
}
  
 +  bblock_t *then_block = (bblock_t *)block-link.next;
 +  bblock_t *else_block = (bblock_t *)block-else_block-link.next;

Isn't this a pointer to the endif-block? I thought else-block would be

 bblock_t *else_block = (bblock_t *)block-then_block-link.next;

or simply just

 bblock_t *else_block = (bblock_t *)block-else_block;

 +
for (int i = 0; i  movs; i++) {
   if (mov_imm_inst[i])
 -if_inst-insert_before(mov_imm_inst[i]);
 - if_inst-insert_before(sel_inst[i]);
 +if_inst-insert_before(block, mov_imm_inst[i]);
 + if_inst-insert_before(block, sel_inst[i]);
  
 - then_mov[i]-remove();
 - else_mov[i]-remove();
 + then_mov[i]-remove(then_block);
 + else_mov[i]-remove(else_block);
}
  
progress = true;
 }
  
 if (progress)
 -  invalidate_live_intervals();
 +  invalidate_live_intervals(false);
  
 return progress;
  }
 -- 
 1.8.5.5
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] squash! glsl: Optimize min/max expression trees

2014-08-18 Thread Connor Abbott
On Mon, Aug 18, 2014 at 9:26 AM, Petri Latvala petri.latv...@intel.com wrote:
 On 08/14/2014 11:00 AM, Connor Abbott wrote:



 Another thing I'd like to see is to change minmax_range to call things
 low and high instead of range[0] and range[1]. This helps
 readability, and the tricks with indirect addressing that having an
 array lets you do are things we really shouldn't be doing anyways
 because it's hard to follow.


 Sure, changing.



 As I mentioned before, swizzle_if_required() should probably use the
 ir_builder swizzle helpers.


 I copied swizzle_if_required from opt_algebraic. I'll squeeze in a patch
 that changes that as well. Or actually just refactor the function to live
 somewhere where it's reusable.




 I'm still not convinced that the algorithm is the best way to go about
 it. Right now, AFAICT, we do something like:

 - Pass in a base range, which is what the min's and max's above us
 in the tree will clamp the value we return to
 - Get the ranges for each subexpression (this is a recursive call)
 - Check and see if each operand is unnecessary (i.e. its range is
 strictly greater than the base range or strictly greater than the
 other argument for mins, the other way around for max's)

 As another thing, the logic for this part could be made a *lot*
 clearer by rearranging the code and commenting. I'd do something like:

 bool is_redundant = false /* whether this operand will never affect
 the final value of the min-max tree */

 if (is_min) {
 /* if this operand will always be greater than the other one, it's
 redundant */
 if (limit[i].low  limit[1 - i].high)
is_redundant = true;

 /* if this operand is always greater than baserange, then even if
 it's smaller than the other one it'll get clamped so it's redundant */
 if (limit[i].low  baserange.high)
is_redundant = true;
 } else {
 ... the exact same logic mirrored ...
 }

 - Recurse into the subexpressions, computing the new baserange.

 What I think we should do instead is change prune_expression() to also
 return the range for the expression (it's now returning two things, so
 one would have to be passed via a class variable), so it would look
 like:

 - Pass in the base range
 - If this is a constant, return ourself and the range with low == high
 - Recurse into both subexpressions, setting both the range (limits[i])
 and the new subexpression
 - If one of the subexpressions is redundant, return the other
 subexpression and its range
 - Otherwise, return ourself and the combination of the ranges

 This will allow us to do the recursion only once, instead of once in
 get_range() and once in prune_expression(), which will make things
 simpler and faster.


 You mean have only prune_expression(), cut out get_range()?

 I tried hard to have this recurse only once and it looks impossible to me.
 Consider this (hopefully this ascii art gets through fine):

  max
   /   \
  max max
 /   \   /   \
3a   b2

 (If ascii art failed, it'smax(max(3, a), max(b, 2)) )

 a and b are variables, 2 and 3 constants. 2 is to be dropped from the right
 subtree of the top max, but for that we need the 3 from the left subtree.
 prune_expression() on the left subtree will get us the 3 as the limit, which
 correctly drops the 2 when recursed to the right subtree. What about if 3
 and 2 are swapped in the tree?


Ah, I see. Can you add a comment somewhere (perhaps before the call to
get_range()) that explains this all so some dummy like me doesn't
later ask why we recurse twice? Something like:

Recurse to get the ranges for each of the subtrees of this
expression. We need to do this as a separate step because we need to
know the ranges of each of the subtrees before we prune either one.
Consider something like this:

(your ASCII art)

We would like to prune away the max on the bottom-right, but to do so
we need to know the range of the expression on the left beforehand,
and there's no guarantee that we will visit either subtree in a
particular order.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Jose Fonseca

On 18/08/14 14:21, Marek Olšák wrote:

On Mon, Aug 18, 2014 at 2:44 PM, Roland Scheidegger srol...@vmware.com wrote:

Am 16.08.2014 02:12, schrieb Connor Abbott:

I know what you might be thinking right now. Wait, *another* IR? Don't
we already have like 5 of those, not counting all the driver-specific
ones? Isn't this stuff complicated enough already? Well, there are some
pretty good reasons to start afresh (again...). In the years we've been
using GLSL IR, we've come to realize that, in fact, it's not what we
want *at all* to do optimizations on. Ian has done a talk at FOSDEM that
highlights some of the problems they've run into:

https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e

But here's the summary:

* GLSL IR is way too much of a memory hog, since it has to make a new
variable for each temporary the compiler creates and then each time you
want to dereference that temporary you need to create an
ir_dereference_variable that points to it which is also very
cache-unfriendly (downright cache-mean!).

* The expression trees were originally added so that we could do
pattern matching to automatically optimize things, but this turned out
to be both very difficult to do and not very helpful. Instead, all it
does is add more complexity to the IR without much benefit - with SSA or
having proper use-def chains, we could get back what the trees give us
while also being able to do lots more optimizations.

* We don't have the concept of basic blocks in GLSL IR, which makes a
lot of optimizations harder because they were originally designed with
basic blocks in mind - take, for example, my SSA series. I had to map a
whole lot of concepts that were based on the control flow graph to this
tree of statements that GLSL IR uses, and the end result wound up
looking nothing at all like the original paper. This problem gets even
worse for things like e.g. Global Code Motion that depend upon having
the dominance tree.

I originally wanted to modify GLSL IR to fix these problems by adding
new instruction types that would address these issues and then
converting back and forth between the old and the new form, but I
realized that fixing all the problems would basically mean a complete
rewrite - and if that's the case, then why don't we start from scratch?
So I took Ken's suggestions and started designing, and then at Intel
over the summer started implementing, a completely new IR which I call
NIR that's at a lower level than GLSL IR, but still high-level enough to
be mostly device-independant (different drivers may have different
passes and different ways of lowering e.g.  matrix multiplies) so that
we can do generic optimizations on it. Having support for SSA from the
beginning was also a must, because lots of optimisations that we really
want for cleaning up DX9-translated games are either a lot easier in or
made possible by SSA. I also made the decision for it to be typeless,
because that's what the cool kids are all doing :) and for a
lower-level, flat IR it seemed like the thing to do (it could have gone
either way, though). So the key design points of NIR (pronounced either
like near as in NIR is near! or to rhyme with burr) are:

* It's flat (no expression trees)

* It's typeless

* Modifiers (abs, negate, saturate), swizzles, and write masks are part
of ALU instructions

* It includes enough GLSL-like things (variables that you can load from
or store to, function calls) to be hardware-agnostic (although we don't
have a way to represent matrix multiplies right now, but that could
easily be added) to be able to do optimizations at a high level, while
having lowering passes that convert variables to registers and
input/output/uniform loads/stores that will open up more opportunities
for optimization and save memory while being more hardware-specific.

* Control flow consists of a tree of if statements and loops, like in
GLSL IR, except the leaves of the tree are now basic blocks instead of
instructions. Also, each basic block keeps track of its successors and
predecessors, so the control flow graph is explicit in the IR.

* SSA is natively supported, and SSA uses point directly to the SSA
definition, which means that the use-def chains are always there, and
def-use chains are kept by tracking the set of all uses for each
definition.

* It's written in C.

(see the README in patch 3 and nir.h in patch 4 for more details)

Some things that are missing or could be improved:

* There's currently no alias tracking for inputs, outputs, and uniforms.
This is especially important for uniforms because we don't pack them
like we pack inputs and outputs.

* We need a way to represent matrix multiplies so that we can do

Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Connor Abbott
On Mon, Aug 18, 2014 at 8:44 AM, Roland Scheidegger srol...@vmware.com wrote:
 Am 16.08.2014 02:12, schrieb Connor Abbott:
 I know what you might be thinking right now. Wait, *another* IR? Don't
 we already have like 5 of those, not counting all the driver-specific
 ones? Isn't this stuff complicated enough already? Well, there are some
 pretty good reasons to start afresh (again...). In the years we've been
 using GLSL IR, we've come to realize that, in fact, it's not what we
 want *at all* to do optimizations on. Ian has done a talk at FOSDEM that
 highlights some of the problems they've run into:

 https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e

 But here's the summary:

 * GLSL IR is way too much of a memory hog, since it has to make a new
 variable for each temporary the compiler creates and then each time you
 want to dereference that temporary you need to create an
 ir_dereference_variable that points to it which is also very
 cache-unfriendly (downright cache-mean!).

 * The expression trees were originally added so that we could do
 pattern matching to automatically optimize things, but this turned out
 to be both very difficult to do and not very helpful. Instead, all it
 does is add more complexity to the IR without much benefit - with SSA or
 having proper use-def chains, we could get back what the trees give us
 while also being able to do lots more optimizations.

 * We don't have the concept of basic blocks in GLSL IR, which makes a
 lot of optimizations harder because they were originally designed with
 basic blocks in mind - take, for example, my SSA series. I had to map a
 whole lot of concepts that were based on the control flow graph to this
 tree of statements that GLSL IR uses, and the end result wound up
 looking nothing at all like the original paper. This problem gets even
 worse for things like e.g. Global Code Motion that depend upon having
 the dominance tree.

 I originally wanted to modify GLSL IR to fix these problems by adding
 new instruction types that would address these issues and then
 converting back and forth between the old and the new form, but I
 realized that fixing all the problems would basically mean a complete
 rewrite - and if that's the case, then why don't we start from scratch?
 So I took Ken's suggestions and started designing, and then at Intel
 over the summer started implementing, a completely new IR which I call
 NIR that's at a lower level than GLSL IR, but still high-level enough to
 be mostly device-independant (different drivers may have different
 passes and different ways of lowering e.g.  matrix multiplies) so that
 we can do generic optimizations on it. Having support for SSA from the
 beginning was also a must, because lots of optimisations that we really
 want for cleaning up DX9-translated games are either a lot easier in or
 made possible by SSA. I also made the decision for it to be typeless,
 because that's what the cool kids are all doing :) and for a
 lower-level, flat IR it seemed like the thing to do (it could have gone
 either way, though). So the key design points of NIR (pronounced either
 like near as in NIR is near! or to rhyme with burr) are:

 * It's flat (no expression trees)

 * It's typeless

 * Modifiers (abs, negate, saturate), swizzles, and write masks are part
 of ALU instructions

 * It includes enough GLSL-like things (variables that you can load from
 or store to, function calls) to be hardware-agnostic (although we don't
 have a way to represent matrix multiplies right now, but that could
 easily be added) to be able to do optimizations at a high level, while
 having lowering passes that convert variables to registers and
 input/output/uniform loads/stores that will open up more opportunities
 for optimization and save memory while being more hardware-specific.

 * Control flow consists of a tree of if statements and loops, like in
 GLSL IR, except the leaves of the tree are now basic blocks instead of
 instructions. Also, each basic block keeps track of its successors and
 predecessors, so the control flow graph is explicit in the IR.

 * SSA is natively supported, and SSA uses point directly to the SSA
 definition, which means that the use-def chains are always there, and
 def-use chains are kept by tracking the set of all uses for each
 definition.

 * It's written in C.

 (see the README in patch 3 and nir.h in patch 4 for more details)

 Some things that are missing or could be improved:

 * There's currently no alias tracking for inputs, outputs, and uniforms.
 This is especially important for uniforms because we don't pack them
 like we pack inputs and outputs.

 * We need a way to represent 

Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Connor Abbott
On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote:
 On 18/08/14 14:21, Marek Olšák wrote:

 On Mon, Aug 18, 2014 at 2:44 PM, Roland Scheidegger srol...@vmware.com
 wrote:

 Am 16.08.2014 02:12, schrieb Connor Abbott:

 I know what you might be thinking right now. Wait, *another* IR? Don't
 we already have like 5 of those, not counting all the driver-specific
 ones? Isn't this stuff complicated enough already? Well, there are some
 pretty good reasons to start afresh (again...). In the years we've been
 using GLSL IR, we've come to realize that, in fact, it's not what we
 want *at all* to do optimizations on. Ian has done a talk at FOSDEM that
 highlights some of the problems they've run into:


 https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e

 But here's the summary:

 * GLSL IR is way too much of a memory hog, since it has to make a new
 variable for each temporary the compiler creates and then each time you
 want to dereference that temporary you need to create an
 ir_dereference_variable that points to it which is also very
 cache-unfriendly (downright cache-mean!).

 * The expression trees were originally added so that we could do
 pattern matching to automatically optimize things, but this turned out
 to be both very difficult to do and not very helpful. Instead, all it
 does is add more complexity to the IR without much benefit - with SSA or
 having proper use-def chains, we could get back what the trees give us
 while also being able to do lots more optimizations.

 * We don't have the concept of basic blocks in GLSL IR, which makes a
 lot of optimizations harder because they were originally designed with
 basic blocks in mind - take, for example, my SSA series. I had to map a
 whole lot of concepts that were based on the control flow graph to this
 tree of statements that GLSL IR uses, and the end result wound up
 looking nothing at all like the original paper. This problem gets even
 worse for things like e.g. Global Code Motion that depend upon having
 the dominance tree.

 I originally wanted to modify GLSL IR to fix these problems by adding
 new instruction types that would address these issues and then
 converting back and forth between the old and the new form, but I
 realized that fixing all the problems would basically mean a complete
 rewrite - and if that's the case, then why don't we start from scratch?
 So I took Ken's suggestions and started designing, and then at Intel
 over the summer started implementing, a completely new IR which I call
 NIR that's at a lower level than GLSL IR, but still high-level enough to
 be mostly device-independant (different drivers may have different
 passes and different ways of lowering e.g.  matrix multiplies) so that
 we can do generic optimizations on it. Having support for SSA from the
 beginning was also a must, because lots of optimisations that we really
 want for cleaning up DX9-translated games are either a lot easier in or
 made possible by SSA. I also made the decision for it to be typeless,
 because that's what the cool kids are all doing :) and for a
 lower-level, flat IR it seemed like the thing to do (it could have gone
 either way, though). So the key design points of NIR (pronounced either
 like near as in NIR is near! or to rhyme with burr) are:

 * It's flat (no expression trees)

 * It's typeless

 * Modifiers (abs, negate, saturate), swizzles, and write masks are part
 of ALU instructions

 * It includes enough GLSL-like things (variables that you can load from
 or store to, function calls) to be hardware-agnostic (although we don't
 have a way to represent matrix multiplies right now, but that could
 easily be added) to be able to do optimizations at a high level, while
 having lowering passes that convert variables to registers and
 input/output/uniform loads/stores that will open up more opportunities
 for optimization and save memory while being more hardware-specific.

 * Control flow consists of a tree of if statements and loops, like in
 GLSL IR, except the leaves of the tree are now basic blocks instead of
 instructions. Also, each basic block keeps track of its successors and
 predecessors, so the control flow graph is explicit in the IR.

 * SSA is natively supported, and SSA uses point directly to the SSA
 definition, which means that the use-def chains are always there, and
 def-use chains are kept by tracking the set of all uses for each
 definition.

 * It's written in C.

 (see the README in patch 3 and nir.h in patch 4 for more details)

 Some things that are missing or could be improved:

 * There's currently no alias tracking for inputs, outputs, and uniforms.
 This is especially 

Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Connor Abbott
On Mon, Aug 18, 2014 at 4:32 AM, Michel Dänzer mic...@daenzer.net wrote:
 On 16.08.2014 09:12, Connor Abbott wrote:
 I know what you might be thinking right now. Wait, *another* IR? Don't
 we already have like 5 of those, not counting all the driver-specific
 ones? Isn't this stuff complicated enough already? Well, there are some
 pretty good reasons to start afresh (again...). In the years we've been
 using GLSL IR, we've come to realize that, in fact, it's not what we
 want *at all* to do optimizations on.

 Did you evaluate using LLVM IR instead of inventing yet another one?


 --
 Earthling Michel Dänzer|  http://www.amd.com
 Libre software enthusiast  |Mesa and X developer

Yes. See

http://lists.freedesktop.org/archives/mesa-dev/2014-February/053502.html

and

http://lists.freedesktop.org/archives/mesa-dev/2014-February/053522.html
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Ilia Mirkin
On Mon, Aug 18, 2014 at 12:25 PM, Connor Abbott cwabbo...@gmail.com wrote:
 On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote:
 On 18/08/14 14:21, Marek Olšák wrote:
 Once these are in place, all development effort to go on to
 improving/leveraging the new IR.  We could deprecate TGSI when it would have
 few users.

 Also, switching to LLVM, NIR, or some other IR that uses SSA (or at
 least modifying TGSI to support it) seems like something that's really
 necessary for the Gallium folks. Soon, considering most backends
 already use SSA in one form or another, the situation will look like:

 GLSL IR - NIR - NIR with SSA - optimizations - NIR without SSA -
 TGSI - backend without SSA - backend with SSA

 So backends would have to duplicate the into-SSA logic and every
 shader would have to pay the penalty of being converted out of and
 then back into SSA thanks to TGSI not supporting it.

Looking at it another way, perhaps we should just accept that backends
will want to do their own things, and try to minimize the damage by
doing

GLSL IR - transport ir - backend

Are you envisioning a world where every backend uses NIR, and uses
some sort of shared register allocation/spilling/etc logic,
configurable instruction lists, pluggable with lowering passes? By
then you've invented LLVM...

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 0/6] Implement ARB_conditional_render_inverted

2014-08-18 Thread Ilia Mirkin
On Sun, Aug 17, 2014 at 7:38 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:
 This patch series adds support for ARB_conditional_render_inverted to nvc0,
 softpipe and llvmpipe.

 V2:
  - Add missing _mesa_BeginConditionalRender() parts to series
  - Fix nvc0 blit and inverted rendering
  - Fix relnotes
  - Enable for softpipe and llvmpipe
  - Rebase on top of current HEAD
 V3:
  - Only allow the new modes if the extension is enabled
  - Merge several patches to always have a working tree

 Tobias Klausmann (6):
   mesa: add ARB_conditional_render_inverted flags
   mesa/st: Support ARB_conditional_render_inverted modes
   gallium: Add and handle PIPE_CAP_CONDITIONAL_RENDER_INVERTED

The order of these two patches needs to be swapped, the mesa/st change
depends on the gallium change adding the cap...

   nvc0: Handle ARB_conditional_render_inverted and enable it
   llvmpipe/softpipe: enable ARB_conditional_render_inverted
   docs: Update status of ARB_conditional_render_inverted

  docs/GL3.txt |  2 +-
  docs/relnotes/10.3.html  |  3 ++-
  src/gallium/docs/source/screen.rst   |  2 ++
  src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
  src/gallium/drivers/i915/i915_screen.c   |  1 +
  src/gallium/drivers/ilo/ilo_screen.c |  1 +
  src/gallium/drivers/llvmpipe/lp_screen.c |  2 ++
  src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  1 +
  src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
  src/gallium/drivers/nouveau/nvc0/nvc0_query.c|  5 ++---
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
  src/gallium/drivers/nouveau/nvc0/nvc0_surface.c  |  4 +++-
  src/gallium/drivers/r300/r300_screen.c   |  1 +
  src/gallium/drivers/r600/r600_pipe.c |  1 +
  src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
  src/gallium/drivers/softpipe/sp_screen.c |  2 ++
  src/gallium/drivers/svga/svga_screen.c   |  1 +
  src/gallium/drivers/vc4/vc4_screen.c |  1 +
  src/gallium/include/pipe/p_defines.h |  1 +
  src/mesa/main/condrender.c   | 10 --
  src/mesa/main/extensions.c   |  1 +
  src/mesa/main/mtypes.h   |  1 +
  src/mesa/state_tracker/st_cb_condrender.c| 20 +++-
  src/mesa/state_tracker/st_extensions.c   |  1 +
  24 files changed, 56 insertions(+), 9 deletions(-)

 --
 1.8.4.5

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 4/6] nvc0: Handle ARB_conditional_render_inverted and enable it

2014-08-18 Thread Ilia Mirkin
On Sun, Aug 17, 2014 at 7:38 PM, Tobias Klausmann
tobias.johannes.klausm...@mni.thm.de wrote:
 Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
 ---
  src/gallium/drivers/nouveau/nvc0/nvc0_query.c   | 5 ++---
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c  | 3 +--
  src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 4 +++-
  3 files changed, 6 insertions(+), 6 deletions(-)

 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
 b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
 index 50cef1e..71d48f2 100644
 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
 +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
 @@ -542,7 +542,6 @@ nvc0_render_condition(struct pipe_context *pipe,
 struct nouveau_pushbuf *push = nvc0-base.pushbuf;
 struct nvc0_query *q;
 uint32_t cond;
 -   boolean negated = FALSE;
 boolean wait =
mode != PIPE_RENDER_COND_NO_WAIT 
mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
 @@ -561,13 +560,13 @@ nvc0_render_condition(struct pipe_context *pipe,
 /* NOTE: comparison of 2 queries only works if both have completed */
 switch (q-type) {
 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 -  cond = negated ? NVC0_3D_COND_MODE_EQUAL :
 +  cond = condition ? NVC0_3D_COND_MODE_EQUAL :
 NVC0_3D_COND_MODE_NOT_EQUAL;
wait = TRUE;
break;
 case PIPE_QUERY_OCCLUSION_COUNTER:
 case PIPE_QUERY_OCCLUSION_PREDICATE:
 -  if (likely(!negated)) {
 +  if (likely(!condition)) {
   if (unlikely(q-nesting))
  cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
NVC0_3D_COND_MODE_ALWAYS;
 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
 b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
 index 7c2f11a..84025ef 100644
 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
 +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
 @@ -167,13 +167,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
 pipe_cap param)
 case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 case PIPE_CAP_TEXTURE_GATHER_SM5:
 case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
 +   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
return 1;
 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d = NVE4_3D_CLASS) ? 1 : 0;
 case PIPE_CAP_COMPUTE:
return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
 -   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 - return 0;

 /* unsupported caps */
 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c 
 b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
 index a29f0cc..622193b 100644
 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
 +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
 @@ -1210,6 +1210,8 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct 
 pipe_blit_info *info)
 int64_t du_dx, dv_dy;
 int i;
 uint32_t mode;
 +   uint32_t cond = nvc0-cond_cond ? NVC0_2D_COND_MODE_EQUAL :
 + NVC0_2D_COND_MODE_NOT_EQUAL;
 uint32_t mask = nv50_blit_eng2d_get_mask(info);
 boolean b;

 @@ -1236,7 +1238,7 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct 
 pipe_blit_info *info)
 }

 if (nvc0-cond_query  info-render_condition_enable)
 -  IMMED_NVC0(push, NVC0_2D(COND_MODE), NVC0_2D_COND_MODE_RES_NON_ZERO);
 +  IMMED_NVC0(push, NVC0_2D(COND_MODE), cond);

This used to always get set to NVC0_2D_COND_MODE_RES_NON_ZERO. Now it
will never be set to that. I think you need to copy the cond selection
logic from nvc0_query a little more faithfully...


 if (mask != 0x) {
IMMED_NVC0(push, NVC0_2D(ROP), 0xca); /* DPSDxax */
 --
 1.8.4.5

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Connor Abbott
On Mon, Aug 18, 2014 at 12:38 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Mon, Aug 18, 2014 at 12:25 PM, Connor Abbott cwabbo...@gmail.com wrote:
 On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote:
 On 18/08/14 14:21, Marek Olšák wrote:
 Once these are in place, all development effort to go on to
 improving/leveraging the new IR.  We could deprecate TGSI when it would have
 few users.

 Also, switching to LLVM, NIR, or some other IR that uses SSA (or at
 least modifying TGSI to support it) seems like something that's really
 necessary for the Gallium folks. Soon, considering most backends
 already use SSA in one form or another, the situation will look like:

 GLSL IR - NIR - NIR with SSA - optimizations - NIR without SSA -
 TGSI - backend without SSA - backend with SSA

 So backends would have to duplicate the into-SSA logic and every
 shader would have to pay the penalty of being converted out of and
 then back into SSA thanks to TGSI not supporting it.

 Looking at it another way, perhaps we should just accept that backends
 will want to do their own things, and try to minimize the damage by
 doing

 GLSL IR - transport ir - backend

 Are you envisioning a world where every backend uses NIR, and uses
 some sort of shared register allocation/spilling/etc logic,
 configurable instruction lists, pluggable with lowering passes? By
 then you've invented LLVM...

   -ilia

No, I expect that backends will still want to do their own register
allocation/spilling/scheduling etc. - and besides for that, NIR
supports structured control flow, swizzles and writemasks, modifiers
(abs, negate, saturate), etc. natively in the IR instead of something
that's tacked on or something that drivers have to do themselves. So
no, I'm not re-inventing LLVM. On the other hand, it's entirely
possible for backends to add their own backend-specific opcodes and
intrinsics, and thereby be able to do some amount of lowering and
optimization in NIR. Another reason that backends might want to accept
NIR is so that they can give NIR passes more precise information on
e.g. when to do if-conversion. Again, this is all speculative though -
we'll have to do more of the work before we can find out how we want
to use NIR beyond what originally wrote it to be, which was a way to
do common optimizations that we couldn't do in GLSL IR.

Connor
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/8] i965/fs: Optimize gl_FrontFacing calculation on Gen6+.

2014-08-18 Thread Anuj Phogat
On Fri, Aug 15, 2014 at 4:43 PM, Matt Turner matts...@gmail.com wrote:
 On Fri, Aug 15, 2014 at 3:26 PM, Anuj Phogat anuj.pho...@gmail.com wrote:
 With comment on patch 3 addressed:
 Patches 1-7 are: Acked-by: Anuj Phogat anuj.pho...@gmail.com

 Thanks for looking over the patches!

 Acked-by is used in the kernel by a maintainer to acknowledge the
 changes to his particular subsystem in a patch that affects many
 subsystems, or for maintainers to say yeah, looks good to me. We use
 it in Mesa to say yeah, the idea seems good without really saying
 that we've reviewed the contents of the patch itself.

 The kernel docs describe what a Reviewed-by really means here [1], but
 basically it's that (1) I read the patch, (2) I'm satisfied with the
 patch, (3) I think the patch is worthwhile, and (4) I'm not making any
 guarantees. :-)

 Can I upgrade your Acked-bys to Reviewed-bys?

Yes. go ahead.

 [1] 
 http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches#n498
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Marek Olšák
On Mon, Aug 18, 2014 at 7:05 PM, Connor Abbott cwabbo...@gmail.com wrote:
 On Mon, Aug 18, 2014 at 12:38 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 Looking at it another way, perhaps we should just accept that backends
 will want to do their own things, and try to minimize the damage by
 doing

 GLSL IR - transport ir - backend

 Are you envisioning a world where every backend uses NIR, and uses
 some sort of shared register allocation/spilling/etc logic,
 configurable instruction lists, pluggable with lowering passes? By
 then you've invented LLVM...

   -ilia

 No, I expect that backends will still want to do their own register
 allocation/spilling/scheduling etc. - and besides for that, NIR
 supports structured control flow, swizzles and writemasks, modifiers
 (abs, negate, saturate), etc. natively in the IR instead of something
 that's tacked on or something that drivers have to do themselves. So
 no, I'm not re-inventing LLVM. On the other hand, it's entirely
 possible for backends to add their own backend-specific opcodes and
 intrinsics, and thereby be able to do some amount of lowering and
 optimization in NIR. Another reason that backends might want to accept
 NIR is so that they can give NIR passes more precise information on
 e.g. when to do if-conversion. Again, this is all speculative though -
 we'll have to do more of the work before we can find out how we want
 to use NIR beyond what originally wrote it to be, which was a way to
 do common optimizations that we couldn't do in GLSL IR.

This sounds good.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Roland Scheidegger
Am 18.08.2014 19:05, schrieb Connor Abbott:
 On Mon, Aug 18, 2014 at 12:38 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Mon, Aug 18, 2014 at 12:25 PM, Connor Abbott cwabbo...@gmail.com wrote:
 On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote:
 On 18/08/14 14:21, Marek Olšák wrote:
 Once these are in place, all development effort to go on to
 improving/leveraging the new IR.  We could deprecate TGSI when it would 
 have
 few users.

 Also, switching to LLVM, NIR, or some other IR that uses SSA (or at
 least modifying TGSI to support it) seems like something that's really
 necessary for the Gallium folks. Soon, considering most backends
 already use SSA in one form or another, the situation will look like:

 GLSL IR - NIR - NIR with SSA - optimizations - NIR without SSA -
 TGSI - backend without SSA - backend with SSA

 So backends would have to duplicate the into-SSA logic and every
 shader would have to pay the penalty of being converted out of and
 then back into SSA thanks to TGSI not supporting it.

 Looking at it another way, perhaps we should just accept that backends
 will want to do their own things, and try to minimize the damage by
 doing

 GLSL IR - transport ir - backend

 Are you envisioning a world where every backend uses NIR, and uses
 some sort of shared register allocation/spilling/etc logic,
 configurable instruction lists, pluggable with lowering passes? By
 then you've invented LLVM...

   -ilia
 
 No, I expect that backends will still want to do their own register
 allocation/spilling/scheduling etc. - and besides for that, NIR
 supports structured control flow, swizzles and writemasks, modifiers
 (abs, negate, saturate), etc. natively in the IR instead of something
 that's tacked on or something that drivers have to do themselves. So
 no, I'm not re-inventing LLVM. On the other hand, it's entirely
 possible for backends to add their own backend-specific opcodes and
 intrinsics, and thereby be able to do some amount of lowering and
 optimization in NIR. Another reason that backends might want to accept
 NIR is so that they can give NIR passes more precise information on
 e.g. when to do if-conversion. Again, this is all speculative though -
 we'll have to do more of the work before we can find out how we want
 to use NIR beyond what originally wrote it to be, which was a way to
 do common optimizations that we couldn't do in GLSL IR.
 
 Connor

I guess having the typical gpu features (vec4 representation along
with swizzles, writemasks, modifiers) in the IR is nice, though I'm
beginning to wonder if it's all that useful. Obviously, it maps really
well to old gpus (like r300) and old-style shaders using lots of vec4
(the human-readable assembly is going to be much nicer if you have vec4
support) but ultimately it seems most newer archs are scalar (or rather,
their vectors are not along the instruction level axis). Something like
Mali gpus being an exception rather than the norm. Even things like r600
need to do their own vliw-ication anyway.
In any case, for gallium I'm pretty indifferent to shader IR actually,
as long as things keep working... Just keep in mind though while tgsi
might not be the optimal solution, there's significant precedence for
this kind of low level shader language (since even d3d10 follows that
model, though I can't tell how happy the IHVs are with it...). But if
NIR benefits glsl compiler on its own that looks all good to me, it's
just an area I'm not really familiar with.

btw do you have some example of how a shader looks printed out? I'm too
lazy to play with it myself...

Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18/20] i965/fs: Preserve CFG in the SEL peephole.

2014-08-18 Thread Matt Turner
On Mon, Aug 18, 2014 at 8:34 AM, Pohjolainen, Topi
topi.pohjolai...@intel.com wrote:
 On Thu, Jul 24, 2014 at 07:54:25PM -0700, Matt Turner wrote:
 ---
  src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 15 +--
  1 file changed, 9 insertions(+), 6 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
 index d64cd98..f609138 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
 @@ -212,23 +212,26 @@ fs_visitor::opt_peephole_sel()
if (brw-gen == 6  if_inst-conditional_mod) {
   fs_inst *cmp_inst = CMP(reg_null_d, if_inst-src[0], 
 if_inst-src[1],
   if_inst-conditional_mod);
 - if_inst-insert_before(cmp_inst);
 + if_inst-insert_before(block, cmp_inst);
}

 +  bblock_t *then_block = (bblock_t *)block-link.next;
 +  bblock_t *else_block = (bblock_t *)block-else_block-link.next;

 Isn't this a pointer to the endif-block? I thought else-block would be

  bblock_t *else_block = (bblock_t *)block-then_block-link.next;

 or simply just

  bblock_t *else_block = (bblock_t *)block-else_block;

It's the block immediately following the ELSE instruction (containing
the MOVs). E.g.,

B0: ...
IF
B1: MOV
MOV
ELSE
B2: MOV
MOV
B3: ENDIF
...

then_block is B1, and else_block is B2. I can name them something else
if that would make it clearer.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/7] i965/gen8: Add 3-src instruction compaction tables.

2014-08-18 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index dc0060d..1f30366 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -461,6 +461,33 @@ static const uint16_t gen8_src_index_table[32] = {
0b010110001000
 };
 
+/* This is actually the control index table for Cherryview (26 bits), but the
+ * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
+ * the start.
+ *
+ * The low 24 bits have the same mappings on both hardware.
+ */
+static const uint32_t gen8_3src_control_index_table[4] = {
+   0b001111,
+   0b000111,
+   0b001001,
+   0b001011
+};
+
+/* This is actually the control index table for Cherryview (49 bits), but the
+ * only difference from Broadwell (46 bits) is that it has three extra 0-bits
+ * at the start.
+ *
+ * The low 44 bits have the same mappings on both hardware, and since the high
+ * three bits on Broadwell are zero, we can reuse Cherryview's table.
+ */
+static const uint64_t gen8_3src_source_index_table[4] = {
+   0b0011100100111001001110010,
+   0b00111001001110010011100100010,
+   0b00111001001110010011100101000,
+   0b00111001001110010011100100010
+};
+
 static const uint32_t *control_index_table;
 static const uint32_t *datatype_table;
 static const uint16_t *subreg_table;
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] i965/gen8: Add instruction compaction tables.

2014-08-18 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 150 +
 1 file changed, 150 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index f100297..dc0060d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -321,6 +321,146 @@ static const uint16_t gen7_src_index_table[32] = {
0b010110001000
 };
 
+static const uint32_t gen8_control_index_table[32] = {
+   0b010,
+   0b100,
+   0b101,
+   0b110,
+   0b111,
+   0b1000100,
+   0b1000101,
+   0b1000111,
+   0b1001000,
+   0b1001001,
+   0b1001101,
+   0b110,
+   0b111,
+   0b1100010,
+   0b1100011,
+   0b1100100,
+   0b1100101,
+   0b1100111,
+   0b1101001,
+   0b1101101,
+   0b111,
+   0b111,
+   0b0001000,
+   0b0001010,
+   0b0001100,
+   0b0001001,
+   0b0010110,
+   0b0010111,
+   0b0011000,
+   0b0011001,
+   0b0101000,
+   0b0101001
+};
+
+static const uint32_t gen8_datatype_table[32] = {
+   0b00101,
+   0b001000100,
+   0b001000101,
+   0b001001101,
+   0b0010101011101,
+   0b00100010111011101,
+   0b0010001110101,
+   0b00100011101000101,
+   0b00100011101011101,
+   0b001010101,
+   0b001110100,
+   0b001110101,
+   0b001000101000101000101,
+   0b001000111000101000100,
+   0b001000111000101000101,
+   0b001011100011101011101,
+   0b001011101011100011101,
+   0b001011101011101011100,
+   0b001011101011101011101,
+   0b00101011101011100,
+   0b001001100,
+   0b0010001011101,
+   0b0010101000101,
+   0b001010100,
+   0b001000101000101000100,
+   0b00100011100010100,
+   0b00100100100101001,
+   0b001010111011101011101,
+   0b00101011101011101,
+   0b00100001101001100,
+   0b001001001001001001000,
+   0b001001011001001001000
+};
+
+static const uint16_t gen8_subreg_table[32] = {
+   0b000,
+   0b001,
+   0b0001000,
+   0b000,
+   0b001,
+   0b0001000,
+   0b001,
+   0b0011000,
+   0b010,
+   0b011,
+   0b0101000,
+   0b001,
+   0b0010001,
+   0b0011001,
+   0b0011010,
+   0b0011011,
+   0b0011100,
+   0b0011111,
+   0b00110001000,
+   0b00110001110,
+   0b0011000,
+   0b00100011000,
+   0b00100001000,
+   0b010,
+   0b0111000,
+   0b011,
+   0b0001111,
+   0b100,
+   0b101,
+   0b110,
+   0b111,
+   0b11100011100
+};
+
+static const uint16_t gen8_src_index_table[32] = {
+   0b,
+   0b0010,
+   0b0001,
+   0b00010010,
+   0b00011000,
+   0b0010,
+   0b00101000,
+   0b01001000,
+   0b0101,
+   0b0111,
+   0b0000,
+   0b0011,
+   0b00110010,
+   0b00111000,
+   0b00110001,
+   0b001100010010,
+   0b00110010,
+   0b001100101000,
+   0b001100111000,
+   0b00110100,
+   0b00110110,
+   0b001101001000,
+   0b00110101,
+   0b00110110,
+   0b001101101000,
+   0b00110111,
+   0b001101110001,
+   0b00110000,
+   0b010001101000,
+   0b010001101001,
+   0b010001101010,
+   0b010110001000
+};
+
 static const uint32_t *control_index_table;
 static const uint32_t *datatype_table;
 static const uint16_t *subreg_table;
@@ -679,8 +819,18 @@ brw_init_compaction_tables(struct brw_context *brw)
assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
+   assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] 
!= 0);
+   assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
+   assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
+   assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
 
switch (brw-gen) {
+   case 8:
+  control_index_table = gen8_control_index_table;
+  datatype_table = gen8_datatype_table;
+  subreg_table = gen8_subreg_table;
+  src_index_table = gen8_src_index_table;
+  break;
case 7:
   control_index_table = gen7_control_index_table;
   datatype_table = gen7_datatype_table;
-- 
1.8.5.5

___
mesa-dev 

[Mesa-dev] [PATCH 7/7] i965: Enable instruction compaction on Gen8+.

2014-08-18 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 727fef5..280d7f7 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -1101,7 +1101,7 @@ brw_compact_instructions(struct brw_compile *p, int 
start_offset,
 */
int old_ip[(p-next_insn_offset - start_offset) / 8];
 
-   if (brw-gen  6 || brw-gen = 8)
+   if (brw-gen  6 || brw-gen  8)
   return;
 
int src_offset;
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/7] i965: Broadwell/Cherryview instruction compaction

2014-08-18 Thread Matt Turner
Available from

   git://people.freedesktop.org/~mattst88/mesa gen8compact

(The branch is based on master before krh's fast clear series)

Cherryview's instruction compaction is slightly different, and is yet
untested.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] i965: Reverse condition ordering to let us support other gens.

2014-08-18 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 625cfbb..25a96e7 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -789,14 +789,14 @@ brw_compact_instructions(struct brw_compile *p, int 
start_offset,
   case BRW_OPCODE_ELSE:
   case BRW_OPCODE_ENDIF:
   case BRW_OPCODE_WHILE:
- if (brw-gen == 6) {
+ if (brw-gen = 7) {
+update_uip_jip(brw, insn, this_old_ip, compacted_counts);
+ } else if (brw-gen == 6) {
 int gen6_jump_count = brw_inst_gen6_jump_count(brw, insn);
 target_old_ip = this_old_ip + gen6_jump_count;
 target_compacted_count = compacted_counts[target_old_ip];
 gen6_jump_count -= (target_compacted_count - this_compacted_count);
 brw_inst_set_gen6_jump_count(brw, insn, gen6_jump_count);
- } else {
-update_uip_jip(brw, insn, this_old_ip, compacted_counts);
  }
  break;
   }
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/7] i965: Add support for compacting 3-src instructions on Gen8.

2014-08-18 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 189 +
 1 file changed, 189 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 07faff4..727fef5 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -611,6 +611,97 @@ set_src1_index(struct brw_context *brw, brw_compact_inst 
*dst, brw_inst *src,
return true;
 }
 
+static bool
+set_3src_control_index(struct brw_context *brw, brw_compact_inst *dst, 
brw_inst *src)
+{
+   assert(brw-gen = 8);
+
+   uint32_t uncompacted =  /* 24b/BDW; 26b/CHV */
+  (brw_inst_bits(src, 34, 32)  21) | /*  3b */
+  (brw_inst_bits(src, 28,  8));/* 21b */
+
+   if (brw-is_cherryview)
+  uncompacted |= brw_inst_bits(src, 36, 35)  24; /* 2b */
+
+   for (int i = 0; i  4; i++) {
+  if (gen8_3src_control_index_table[i] == uncompacted) {
+ brw_compact_inst_set_3src_control_index(dst, i);
+return true;
+  }
+   }
+
+   return false;
+}
+
+static bool
+set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst 
*src)
+{
+   assert(brw-gen = 8);
+
+   uint64_t uncompacted =/* 46b/BDW; 49b/CHV */
+  (brw_inst_bits(src,  83,  83)  43) | /*  1b */
+  (brw_inst_bits(src, 114, 107)  35) | /*  8b */
+  (brw_inst_bits(src,  93,  86)  27) | /*  8b */
+  (brw_inst_bits(src,  72,  65)  19) | /*  8b */
+  (brw_inst_bits(src,  55,  37));/* 19b */
+
+   if (brw-is_cherryview) {
+  uncompacted |=
+ (brw_inst_bits(src, 126, 125)  47) | /* 2b */
+ (brw_inst_bits(src, 105, 104)  45) | /* 2b */
+ (brw_inst_bits(src,  84,  84)  44);  /* 1b */
+   } else {
+  uncompacted |=
+ (brw_inst_bits(src, 125, 125)  45) | /* 1b */
+ (brw_inst_bits(src, 104, 104)  44);  /* 1b */
+   }
+
+   for (int i = 0; i  4; i++) {
+  if (gen8_3src_source_index_table[i] == uncompacted) {
+ brw_compact_inst_set_3src_source_index(dst, i);
+return true;
+  }
+   }
+
+   return false;
+}
+
+static bool
+brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst 
*dst,
+ brw_inst *src)
+{
+   assert(brw-gen = 8);
+
+#define compact(field) \
+   brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(brw, src))
+
+   compact(opcode);
+
+   if (!set_3src_control_index(brw, dst, src))
+  return false;
+
+   if (!set_3src_source_index(brw, dst, src))
+  return false;
+
+   compact(dst_reg_nr);
+   compact(src0_rep_ctrl);
+   brw_compact_inst_set_3src_cmpt_control(dst, true);
+   compact(debug_control);
+   compact(saturate);
+   compact(src1_rep_ctrl);
+   compact(src2_rep_ctrl);
+   compact(src0_reg_nr);
+   compact(src1_reg_nr);
+   compact(src2_reg_nr);
+   compact(src0_subreg_nr);
+   compact(src1_subreg_nr);
+   compact(src2_subreg_nr);
+
+#undef compact
+
+   return true;
+}
+
 /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
  * that's replicated through the high 20 bits.
  *
@@ -627,6 +718,17 @@ is_compactable_immediate(unsigned imm)
return imm == 0 || imm == 0xf000;
 }
 
+/* Returns whether an opcode takes three sources. */
+static bool
+is_3src(uint32_t op)
+{
+   return op == BRW_OPCODE_CSEL ||
+  op == BRW_OPCODE_BFE ||
+  op == BRW_OPCODE_BFI2 ||
+  op == BRW_OPCODE_MAD ||
+  op == BRW_OPCODE_LRP;
+}
+
 /**
  * Tries to compact instruction src into dst.
  *
@@ -651,6 +753,16 @@ brw_try_compact_instruction(struct brw_context *brw, 
brw_compact_inst *dst,
   return false;
}
 
+   if (brw-gen = 8  is_3src(brw_inst_opcode(brw, src))) {
+  memset(temp, 0, sizeof(temp));
+  if (brw_try_compact_3src_instruction(brw, temp, src)) {
+ *dst = temp;
+ return true;
+  } else {
+ return false;
+  }
+   }
+
bool is_immediate =
   brw_inst_src0_reg_file(brw, src) == BRW_IMMEDIATE_VALUE ||
   brw_inst_src1_reg_file(brw, src) == BRW_IMMEDIATE_VALUE;
@@ -767,12 +879,89 @@ set_uncompacted_src1(struct brw_context *brw, brw_inst 
*dst,
}
 }
 
+static void
+set_uncompacted_3src_control_index(struct brw_context *brw, brw_inst *dst,
+   brw_compact_inst *src)
+{
+   assert(brw-gen = 8);
+
+   uint32_t compacted = brw_compact_inst_3src_control_index(src);
+   uint32_t uncompacted = gen8_3src_control_index_table[compacted];
+
+   brw_inst_set_bits(dst, 34, 32, (uncompacted  21)  0x7);
+   brw_inst_set_bits(dst, 28,  8, (uncompacted   0)  0x1f);
+
+   if (brw-is_cherryview)
+  brw_inst_set_bits(dst, 36, 35, (uncompacted  24));
+}
+
+static void
+set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst,
+  brw_compact_inst *src)
+{
+   assert(brw-gen = 8);
+
+   uint32_t compacted = brw_compact_inst_3src_source_index(src);
+   

[Mesa-dev] [PATCH 5/7] i965: Add support for compacting 1- and 2-src instructions on Gen8.

2014-08-18 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 48 ++
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 1f30366..07faff4 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -496,14 +496,19 @@ static const uint16_t *src_index_table;
 static bool
 set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst 
*src)
 {
-   uint32_t uncompacted =  /* 17b/SNB; 19b/IVB+ */
-  (brw_inst_bits(src, 31, 31)  16) | /* 1b */
-  (brw_inst_bits(src, 23,  8));/* 16b */
+   uint32_t uncompacted = brw-gen = 8  /* 17b/SNB; 19b/IVB+ */
+  ? (brw_inst_bits(src, 33, 31)  16) | /*  3b */
+(brw_inst_bits(src, 23, 12)   4) | /* 12b */
+(brw_inst_bits(src, 10,  9)   2) | /*  2b */
+(brw_inst_bits(src, 34, 34)   1) | /*  1b */
+(brw_inst_bits(src,  8,  8)) /*  1b */
+  : (brw_inst_bits(src, 31, 31)  16) | /*  1b */
+(brw_inst_bits(src, 23,  8));/* 16b */
 
/* On gen7, the flag register and subregister numbers are integrated into
 * the control index.
 */
-   if (brw-gen = 7)
+   if (brw-gen == 7)
   uncompacted |= brw_inst_bits(src, 90, 89)  17; /* 2b */
 
for (int i = 0; i  32; i++) {
@@ -520,9 +525,12 @@ static bool
 set_datatype_index(struct brw_context *brw, brw_compact_inst *dst,
brw_inst *src)
 {
-   uint32_t uncompacted =  /* 18b */
-  (brw_inst_bits(src, 63, 61)  15) | /* 3b */
-  (brw_inst_bits(src, 46, 32));/* 15b */
+   uint32_t uncompacted = brw-gen = 8  /* 18b/SNB+; 21b/BDW+ */
+  ? (brw_inst_bits(src, 63, 61)  18) | /*  3b */
+(brw_inst_bits(src, 94, 89)  12) | /*  6b */
+(brw_inst_bits(src, 46, 35)) /* 12b */
+  : (brw_inst_bits(src, 63, 61)  15) | /*  3b */
+(brw_inst_bits(src, 46, 32));/* 15b */
 
for (int i = 0; i  32; i++) {
   if (datatype_table[i] == uncompacted) {
@@ -692,11 +700,19 @@ set_uncompacted_control(struct brw_context *brw, brw_inst 
*dst,
uint32_t uncompacted =
   control_index_table[brw_compact_inst_control_index(src)];
 
-   brw_inst_set_bits(dst, 31, 31, (uncompacted  16)  0x1);
-   brw_inst_set_bits(dst, 23,  8, (uncompacted  0x));
+   if (brw-gen = 8) {
+  brw_inst_set_bits(dst, 33, 31, (uncompacted  16));
+  brw_inst_set_bits(dst, 23, 12, (uncompacted   4)  0xfff);
+  brw_inst_set_bits(dst, 10,  9, (uncompacted   2)  0x3);
+  brw_inst_set_bits(dst, 34, 34, (uncompacted   1)  0x1);
+  brw_inst_set_bits(dst,  8,  8, (uncompacted   0)  0x1);
+   } else {
+  brw_inst_set_bits(dst, 31, 31, (uncompacted  16)  0x1);
+  brw_inst_set_bits(dst, 23,  8, (uncompacted  0x));
 
-   if (brw-gen = 7)
-  brw_inst_set_bits(dst, 90, 89, uncompacted  17);
+  if (brw-gen == 7)
+ brw_inst_set_bits(dst, 90, 89, uncompacted  17);
+   }
 }
 
 static void
@@ -705,8 +721,14 @@ set_uncompacted_datatype(struct brw_context *brw, brw_inst 
*dst,
 {
uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
 
-   brw_inst_set_bits(dst, 63, 61, (uncompacted  15));
-   brw_inst_set_bits(dst, 46, 32, (uncompacted  0x7fff));
+   if (brw-gen = 8) {
+  brw_inst_set_bits(dst, 63, 61, (uncompacted  18));
+  brw_inst_set_bits(dst, 94, 89, (uncompacted  12)  0x3f);
+  brw_inst_set_bits(dst, 46, 35, (uncompacted   0)  0xfff);
+   } else {
+  brw_inst_set_bits(dst, 63, 61, (uncompacted  15));
+  brw_inst_set_bits(dst, 46, 32, (uncompacted  0x7fff));
+   }
 }
 
 static void
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/7] i965: Update JIP/UIP compaction code to operate on bytes.

2014-08-18 Thread Matt Turner
JIP/UIP were previously in units of compacted instructions. On Gen8
they're in units of bytes.
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 25a96e7..f100297 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -653,17 +653,19 @@ static void
 update_uip_jip(struct brw_context *brw, brw_inst *insn,
int this_old_ip, int *compacted_counts)
 {
-   int jip = brw_inst_jip(brw, insn);
+   int scale = brw-gen = 8 ? sizeof(brw_compact_inst) : 1;
+
+   int32_t jip = brw_inst_jip(brw, insn) / scale;
jip -= compacted_between(this_old_ip, this_old_ip + jip, compacted_counts);
-   brw_inst_set_jip(brw, insn, jip);
+   brw_inst_set_jip(brw, insn, jip * scale);
 
if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF ||
brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE)
   return;
 
-   int uip = brw_inst_uip(brw, insn);
+   int32_t uip = brw_inst_uip(brw, insn) / scale;
uip -= compacted_between(this_old_ip, this_old_ip + uip, compacted_counts);
-   brw_inst_set_uip(brw, insn, uip);
+   brw_inst_set_uip(brw, insn, uip * scale);
 }
 
 void
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glcpp: Don't use alternation in the lookahead for empty pragmas.

2014-08-18 Thread Carl Worth
We've found that there's a buffer overrun bug in flex that's triggered by
using alternation in a lookahead pattern.

Fortunately, we don't need to match the exact {NEWLINE} expression to detect
an empty pragma. It suffices to verify that there are no non-space characters
before any newline character. So we can use a simple [\r\n] to get the desired
behavior while avoiding the flex bug.

Fixes Piglit's 16385-consecutive-chars and
17000-consecutive-chars-identifier tests.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82472
Signed-off-by: Carl Worth cwo...@cworth.org
Approach-suggested-by: Kenneth Graunke kenn...@whitecape.org
CC: Kenneth Graunke kenn...@whitecape.org
---

 Thanks for chasing down the fix for this regression of mine, Ken.

 I am embarrassed that I clearly didn't run piglit enough while testing my
 original branch.

 With your fix above, there is some state that's not updated as it should
 be when returning a NEWLINE token, (such as incrementing yylineno, etc.).
 I tried to improve things to update all that state, but it proved
 problematic, (putting the state updates in a common function doesn't
 work because only the outer lexing function has access to local variables
 like yylineno).

 The alternate approach here was your recommendation, of course.

 src/glsl/glcpp/glcpp-lex.l | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l
index 98d500e..aaef7b8 100644
--- a/src/glsl/glcpp/glcpp-lex.l
+++ b/src/glsl/glcpp/glcpp-lex.l
@@ -289,8 +289,14 @@ HEXADECIMAL_INTEGER0[xX][0-9a-fA-F]+[uU]?
 }
 
/* Swallow empty #pragma directives, (to avoid confusing the
-* downstream compiler). */
-HASHpragma{HSPACE}*/{NEWLINE} {
+* downstream compiler).
+*
+* Note: We use a simple regular expression for the lookahead
+* here. Specifically, we cannot use the complete {NEWLINE} expression
+* since it uses alternation and we've found that there's a flex bug
+* where using alternation in the lookahead portion of a pattern
+* triggers a buffer overrun.  /
+HASHpragma{HSPACE}*/[\r\n] {
BEGIN INITIAL;
 }
 
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Thomas Helland
Hi Connor!

I've been scrolling through your github-repo a bit the latest weeks,
and I have to say, this seems quite promising.

I've got some questions that I haven't really been able to answer
myself with the quick glimpse I've had over the codebase:

Since we're in large making a mathematical graph rewriting
simplifier-thingy just as much as a compiler, does the IR as of
now have an easy way of storing upper and lower bounds of variables?

Also, does it have an easy way to get something like the
hierarchical visitor we have in GLSL IR?
(A way of doing, say, algebraic optimizations the way we do now?)

With these two in place, it would be easy to make a general bounds-checking
optimization to eliminate max/min/sin/sign/cos/ etc operations.
I believe that we, as of now, do not have such a pass.

If this IR lands, I could probably fing some time to port some
of the optimization-passes from GLSL IR to NIR.

Regards,
Thomas

2014-08-16 2:12 GMT+02:00 Connor Abbott cwabbo...@gmail.com:
 I know what you might be thinking right now. Wait, *another* IR? Don't
 we already have like 5 of those, not counting all the driver-specific
 ones? Isn't this stuff complicated enough already? Well, there are some
 pretty good reasons to start afresh (again...). In the years we've been
 using GLSL IR, we've come to realize that, in fact, it's not what we
 want *at all* to do optimizations on. Ian has done a talk at FOSDEM that
 highlights some of the problems they've run into:

 https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webm

 But here's the summary:

 * GLSL IR is way too much of a memory hog, since it has to make a new
 variable for each temporary the compiler creates and then each time you
 want to dereference that temporary you need to create an
 ir_dereference_variable that points to it which is also very
 cache-unfriendly (downright cache-mean!).

 * The expression trees were originally added so that we could do
 pattern matching to automatically optimize things, but this turned out
 to be both very difficult to do and not very helpful. Instead, all it
 does is add more complexity to the IR without much benefit - with SSA or
 having proper use-def chains, we could get back what the trees give us
 while also being able to do lots more optimizations.

 * We don't have the concept of basic blocks in GLSL IR, which makes a
 lot of optimizations harder because they were originally designed with
 basic blocks in mind - take, for example, my SSA series. I had to map a
 whole lot of concepts that were based on the control flow graph to this
 tree of statements that GLSL IR uses, and the end result wound up
 looking nothing at all like the original paper. This problem gets even
 worse for things like e.g. Global Code Motion that depend upon having
 the dominance tree.

 I originally wanted to modify GLSL IR to fix these problems by adding
 new instruction types that would address these issues and then
 converting back and forth between the old and the new form, but I
 realized that fixing all the problems would basically mean a complete
 rewrite - and if that's the case, then why don't we start from scratch?
 So I took Ken's suggestions and started designing, and then at Intel
 over the summer started implementing, a completely new IR which I call
 NIR that's at a lower level than GLSL IR, but still high-level enough to
 be mostly device-independant (different drivers may have different
 passes and different ways of lowering e.g.  matrix multiplies) so that
 we can do generic optimizations on it. Having support for SSA from the
 beginning was also a must, because lots of optimisations that we really
 want for cleaning up DX9-translated games are either a lot easier in or
 made possible by SSA. I also made the decision for it to be typeless,
 because that's what the cool kids are all doing :) and for a
 lower-level, flat IR it seemed like the thing to do (it could have gone
 either way, though). So the key design points of NIR (pronounced either
 like near as in NIR is near! or to rhyme with burr) are:

 * It's flat (no expression trees)

 * It's typeless

 * Modifiers (abs, negate, saturate), swizzles, and write masks are part
 of ALU instructions

 * It includes enough GLSL-like things (variables that you can load from
 or store to, function calls) to be hardware-agnostic (although we don't
 have a way to represent matrix multiplies right now, but that could
 easily be added) to be able to do optimizations at a high level, while
 having lowering passes that convert variables to registers and
 input/output/uniform loads/stores that will open up more opportunities
 for optimization and save memory while being more hardware-specific.

 * Control flow consists of a tree of if statements and loops, like in
 GLSL IR, except the leaves of the tree are now basic blocks instead of
 instructions. Also, each basic block keeps track of its successors and
 predecessors, so the 

[Mesa-dev] [PATCH 2/2] i965/vec4: Allow reswizzling writemasks when swizzle is single-valued.

2014-08-18 Thread Matt Turner
total instructions in shared programs: 4288033 - 4266151 (-0.51%)
instructions in affected programs: 930915 - 909033 (-2.35%)
---
View under git show -w. Really just rearranging code.

 src/mesa/drivers/dri/i965/brw_vec4.cpp | 60 +++---
 1 file changed, 33 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c1363ca..155016d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -946,24 +946,27 @@ vec4_instruction::can_reswizzle_dst(int dst_writemask,
   return false;
 
switch (opcode) {
-   case BRW_OPCODE_DP4:
-   case BRW_OPCODE_DP3:
-   case BRW_OPCODE_DP2:
-  return true;
default:
-  /* Check if there happens to be no reswizzling required. */
-  for (int c = 0; c  4; c++) {
- int bit = 1  BRW_GET_SWZ(swizzle, c);
- /* Skip components of the swizzle not used by the dst. */
- if (!(dst_writemask  (1  c)))
-continue;
+  if (!brw_is_single_value_swizzle(swizzle)) {
+ /* Check if there happens to be no reswizzling required. */
+ for (int c = 0; c  4; c++) {
+int bit = 1  BRW_GET_SWZ(swizzle, c);
+/* Skip components of the swizzle not used by the dst. */
+if (!(dst_writemask  (1  c)))
+   continue;
 
- /* We don't do the reswizzling yet, so just sanity check that we
-  * don't have to.
-  */
- if (bit != (1  c))
-return false;
+/* We don't do the reswizzling yet, so just sanity check that we
+ * don't have to.
+ */
+if (bit != (1  c))
+   return false;
+ }
+ return true;
   }
+  /* fallthrough */
+   case BRW_OPCODE_DP4:
+   case BRW_OPCODE_DP3:
+   case BRW_OPCODE_DP2:
   return true;
}
 }
@@ -981,6 +984,21 @@ vec4_instruction::reswizzle_dst(int dst_writemask, int 
swizzle)
int new_writemask = 0;
 
switch (opcode) {
+   default:
+  if (!brw_is_single_value_swizzle(swizzle)) {
+ for (int c = 0; c  4; c++) {
+/* Skip components of the swizzle not used by the dst. */
+if (!(dst_writemask  (1  c)))
+   continue;
+
+/* We don't do the reswizzling yet, so just sanity check that we
+ * don't have to.
+ */
+assert((1  BRW_GET_SWZ(swizzle, c)) == (1  c));
+ }
+ break;
+  }
+  /* fallthrough */
case BRW_OPCODE_DP4:
case BRW_OPCODE_DP3:
case BRW_OPCODE_DP2:
@@ -997,18 +1015,6 @@ vec4_instruction::reswizzle_dst(int dst_writemask, int 
swizzle)
   }
   dst.writemask = new_writemask;
   break;
-   default:
-  for (int c = 0; c  4; c++) {
- /* Skip components of the swizzle not used by the dst. */
- if (!(dst_writemask  (1  c)))
-continue;
-
- /* We don't do the reswizzling yet, so just sanity check that we
-  * don't have to.
-  */
- assert((1  BRW_GET_SWZ(swizzle, c)) == (1  c));
-  }
-  break;
}
 }
 
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965/vec4: Add a pass to reduce swizzles.

2014-08-18 Thread Matt Turner
total instructions in shared programs: 4344280 - 4288033 (-1.29%)
instructions in affected programs: 397468 - 341221 (-14.15%)
---
Suggestions for a better name are welcome.

 src/mesa/drivers/dri/i965/brw_vec4.cpp | 98 ++
 src/mesa/drivers/dri/i965/brw_vec4.h   |  1 +
 2 files changed, 99 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 5d4a92c..c1363ca 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -311,6 +311,103 @@ src_reg::equals(const src_reg r) const
  sizeof(fixed_hw_reg)) == 0);
 }
 
+/* Replaces unused channels of a swizzle with channels that are used.
+ *
+ * For instance, this pass transforms
+ *
+ *mov vgrf4.yz, vgrf5.wxzy
+ *
+ * into
+ *
+ *mov vgrf4.yz, vgrf5.xxzx
+ *
+ * This eliminates false uses of some channels, letting dead code elimination
+ * remove the instructions that wrote them.
+ */
+bool
+vec4_visitor::opt_reduce_swizzle()
+{
+   bool progress = false;
+
+   foreach_in_list_safe(vec4_instruction, inst, instructions) {
+  if (inst-dst.file == BAD_FILE || inst-dst.file == HW_REG)
+ continue;
+
+  int swizzle[4];
+
+  /* Determine which channels of the sources are read. */
+  switch (inst-opcode) {
+  case BRW_OPCODE_DP4:
+  case BRW_OPCODE_DPH: /* FINISHME: DPH reads only three channels of src0,
+*   but all four of src1.
+*/
+ swizzle[0] = 0;
+ swizzle[1] = 1;
+ swizzle[2] = 2;
+ swizzle[3] = 3;
+ break;
+  case BRW_OPCODE_DP3:
+ swizzle[0] = 0;
+ swizzle[1] = 1;
+ swizzle[2] = 2;
+ swizzle[3] = -1;
+ break;
+  case BRW_OPCODE_DP2:
+ swizzle[0] = 0;
+ swizzle[1] = 1;
+ swizzle[2] = -1;
+ swizzle[3] = -1;
+ break;
+  default:
+ swizzle[0] = inst-dst.writemask  WRITEMASK_X ? 0 : -1;
+ swizzle[1] = inst-dst.writemask  WRITEMASK_Y ? 1 : -1;
+ swizzle[2] = inst-dst.writemask  WRITEMASK_Z ? 2 : -1;
+ swizzle[3] = inst-dst.writemask  WRITEMASK_W ? 3 : -1;
+ break;
+  }
+
+  /* Resolve unread channels (-1) by assigning them the swizzle of the
+   * first channel that is used.
+   */
+  int chosen = 0;
+  for (int i = 0; i  4; i++) {
+ if (swizzle[i] != -1) {
+chosen = swizzle[i];
+break;
+ }
+  }
+  for (int i = 0; i  4; i++) {
+ if (swizzle[i] == -1) {
+swizzle[i] = chosen;
+ }
+  }
+
+  /* Update sources' swizzles. */
+  for (int i = 0; i  3; i++) {
+ if (inst-src[i].file != GRF 
+ inst-src[i].file != ATTR 
+ inst-src[i].file != UNIFORM)
+continue;
+
+ int swiz[4];
+ for (int j = 0; j  4; j++) {
+swiz[j] = BRW_GET_SWZ(inst-src[i].swizzle, swizzle[j]);
+ }
+
+ unsigned new_swizzle = BRW_SWIZZLE4(swiz[0], swiz[1], swiz[2], 
swiz[3]);
+ if (inst-src[i].swizzle != new_swizzle) {
+inst-src[i].swizzle = new_swizzle;
+progress = true;
+ }
+  }
+   }
+
+   if (progress)
+  invalidate_live_intervals();
+
+   return progress;
+}
+
 static bool
 try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
   const struct brw_context *brw)
@@ -1701,6 +1798,7 @@ vec4_visitor::run()
   iteration++;
   int pass_num = 0;
 
+  OPT(opt_reduce_swizzle);
   OPT(dead_code_eliminate);
   OPT(dead_control_flow_eliminate, this);
   OPT(opt_copy_propagation);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index c59d24f..f009dd2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -380,6 +380,7 @@ public:
void calculate_live_intervals();
void invalidate_live_intervals();
void split_virtual_grfs();
+   bool opt_reduce_swizzle();
bool dead_code_eliminate();
bool virtual_grf_interferes(int a, int b);
bool opt_copy_propagation();
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/19] auxiliary/os: introduce os_get_total_physical_memory helper function

2014-08-18 Thread Alexander von Gluck IV

On , Emil Velikov wrote:

Cc: Alexander von Gluck IV kallis...@unixzen.com
Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
---
 src/gallium/auxiliary/os/os_misc.c | 64 
++

 src/gallium/auxiliary/os/os_misc.h |  7 +
 2 files changed, 71 insertions(+)


The Haiku portion of this patch looks good btw.  I'll do a test build 
shortly.


Thanks for cc'ing me :-)

 -- Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Fix surf-bankh init by default value when surf-tile_split == 0

2014-08-18 Thread Maks Naumov
Signed-off-by: Maks Naumov maksq...@ukr.net
---
 radeon/radeon_surface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/radeon/radeon_surface.c b/radeon/radeon_surface.c
index e056ed4..40a544a 100644
--- a/radeon/radeon_surface.c
+++ b/radeon/radeon_surface.c
@@ -1311,7 +1311,7 @@ static int si_surface_sanity(struct 
radeon_surface_manager *surf_man,
 /* default value */
 surf-mtilea = 1;
 surf-bankw = 1;
-surf-bankw = 1;
+surf-bankh = 1;
 surf-tile_split = 64;
 surf-stencil_tile_split = 64;
 }
@@ -2138,7 +2138,7 @@ static int cik_surface_sanity(struct 
radeon_surface_manager *surf_man,
 /* default value */
 surf-mtilea = 1;
 surf-bankw = 1;
-surf-bankw = 1;
+surf-bankh = 1;
 surf-tile_split = 64;
 surf-stencil_tile_split = 64;
 }
-- 
1.9.1



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Connor Abbott
On Mon, Aug 18, 2014 at 12:20 PM, Thomas Helland
thomashellan...@gmail.com wrote:
 Hi Connor!

 I've been scrolling through your github-repo a bit the latest weeks,
 and I have to say, this seems quite promising.

 I've got some questions that I haven't really been able to answer
 myself with the quick glimpse I've had over the codebase:

 Since we're in large making a mathematical graph rewriting
 simplifier-thingy just as much as a compiler, does the IR as of
 now have an easy way of storing upper and lower bounds of variables?

NIR as it stands doesn't have a way of storing upper and lower bounds
of registers/SSA values (variables aren't used for computation in
NIR), but it would be easy to do for an analysis pass - SSA values are
indexed, so just put them in an array.


 Also, does it have an easy way to get something like the
 hierarchical visitor we have in GLSL IR?
 (A way of doing, say, algebraic optimizations the way we do now?)

We don't have something like a hierarchical visitor in NIR, because it
isn't necessary any more - certainly one could be created, though.

Like I mentioned in the cover letter, we need one of two things to get
the information we got with the expression trees (and actually even
more):

1. Use-def chains (which definitions of this register can possibly
reach this use?) and def-use chains (of all the uses of this register,
which ones can be reached by this definition?)

2. SSA

With SSA, use-def chains and def-use chains are trivial because each
SSA value is defined only once: the use-def chain for each use is just
the one definition, and the def-use chain for each definition is just
the set of all uses, which we already keep track of. You can think of
expression trees as a special case of SSA, where each definition has
only one use.

I think the plan for NIR is to just do all our optimizations in SSA,
so we don't have to mess around with DU and UD chains at all.

One of my pie-in-the-sky ideas is make a language for doing
graph-rewriting where we can say things like a * 1.0 = a similar to
what LLVM has now, except it might get difficult with all the
swizzles, modifiers, etc. that NIR supports.


 With these two in place, it would be easy to make a general bounds-checking
 optimization to eliminate max/min/sin/sign/cos/ etc operations.
 I believe that we, as of now, do not have such a pass.

Well, I think Petri has made a pass like that for GLSL IR, you may
want to check it out - right now it only handles mins and maxes but
you should be able to extend it to other things as well.

Here's a sketch of how a bounds analysis pass for NIR in SSA would
probably work (just making this up now, I haven't worked out the
details):

- Create an array that for each SSA value gives its range,
initializing it to (-infinity, infinity) for each value expect for
ones defined by load_const instructions, the output of sin and cos
instructions, etc.
- Create a worklist of SSA values, initially putting in it only the
values that we didn't initialize to (-infinity, infinity).
- While the worklist isn't empty:
- Grab a value off the worklist
- For each use of the value that is an ALU instruction:
- Re-evaluate the bounds of the value the instruction defines
- If the bound is now tighter and the value isn't already on
the worklist, then put it in the worklist

(note, this will probably work similarly for lots of other analysis
passes, so it might be a good idea to abstract some of it out)

Then, once you have the results of the analysis, you can do things
like replacing all the uses of a max/min instruction with one of its
inputs, etc.


 If this IR lands, I could probably fing some time to port some
 of the optimization-passes from GLSL IR to NIR.

That would be cool. Once this stuff gets actually implemented, there's
probably going to be a lot of low-hanging fruit when it comes to
optimizations, especially since writing optimizations in SSA is so
easy!

Connor


 Regards,
 Thomas

 2014-08-16 2:12 GMT+02:00 Connor Abbott cwabbo...@gmail.com:
 I know what you might be thinking right now. Wait, *another* IR? Don't
 we already have like 5 of those, not counting all the driver-specific
 ones? Isn't this stuff complicated enough already? Well, there are some
 pretty good reasons to start afresh (again...). In the years we've been
 using GLSL IR, we've come to realize that, in fact, it's not what we
 want *at all* to do optimizations on. Ian has done a talk at FOSDEM that
 highlights some of the problems they've run into:

 https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webm

 But here's the summary:

 * GLSL IR is way too much of a memory hog, since it has to make a new
 variable for each temporary the compiler creates and then each time you
 want to dereference that temporary you need to create an
 ir_dereference_variable that points to it which is also very
 cache-unfriendly (downright cache-mean!).

 * The expression 

Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa

2014-08-18 Thread Alex Deucher
On Mon, Aug 18, 2014 at 1:38 PM, Roland Scheidegger srol...@vmware.com wrote:
 Am 18.08.2014 19:05, schrieb Connor Abbott:
 On Mon, Aug 18, 2014 at 12:38 PM, Ilia Mirkin imir...@alum.mit.edu wrote:
 On Mon, Aug 18, 2014 at 12:25 PM, Connor Abbott cwabbo...@gmail.com wrote:
 On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote:
 On 18/08/14 14:21, Marek Olšák wrote:
 Once these are in place, all development effort to go on to
 improving/leveraging the new IR.  We could deprecate TGSI when it would 
 have
 few users.

 Also, switching to LLVM, NIR, or some other IR that uses SSA (or at
 least modifying TGSI to support it) seems like something that's really
 necessary for the Gallium folks. Soon, considering most backends
 already use SSA in one form or another, the situation will look like:

 GLSL IR - NIR - NIR with SSA - optimizations - NIR without SSA -
 TGSI - backend without SSA - backend with SSA

 So backends would have to duplicate the into-SSA logic and every
 shader would have to pay the penalty of being converted out of and
 then back into SSA thanks to TGSI not supporting it.

 Looking at it another way, perhaps we should just accept that backends
 will want to do their own things, and try to minimize the damage by
 doing

 GLSL IR - transport ir - backend

 Are you envisioning a world where every backend uses NIR, and uses
 some sort of shared register allocation/spilling/etc logic,
 configurable instruction lists, pluggable with lowering passes? By
 then you've invented LLVM...

   -ilia

 No, I expect that backends will still want to do their own register
 allocation/spilling/scheduling etc. - and besides for that, NIR
 supports structured control flow, swizzles and writemasks, modifiers
 (abs, negate, saturate), etc. natively in the IR instead of something
 that's tacked on or something that drivers have to do themselves. So
 no, I'm not re-inventing LLVM. On the other hand, it's entirely
 possible for backends to add their own backend-specific opcodes and
 intrinsics, and thereby be able to do some amount of lowering and
 optimization in NIR. Another reason that backends might want to accept
 NIR is so that they can give NIR passes more precise information on
 e.g. when to do if-conversion. Again, this is all speculative though -
 we'll have to do more of the work before we can find out how we want
 to use NIR beyond what originally wrote it to be, which was a way to
 do common optimizations that we couldn't do in GLSL IR.

 Connor

 I guess having the typical gpu features (vec4 representation along
 with swizzles, writemasks, modifiers) in the IR is nice, though I'm
 beginning to wonder if it's all that useful. Obviously, it maps really
 well to old gpus (like r300) and old-style shaders using lots of vec4
 (the human-readable assembly is going to be much nicer if you have vec4
 support) but ultimately it seems most newer archs are scalar (or rather,
 their vectors are not along the instruction level axis). Something like
 Mali gpus being an exception rather than the norm. Even things like r600
 need to do their own vliw-ication anyway.
 In any case, for gallium I'm pretty indifferent to shader IR actually,
 as long as things keep working... Just keep in mind though while tgsi
 might not be the optimal solution, there's significant precedence for
 this kind of low level shader language (since even d3d10 follows that
 model, though I can't tell how happy the IHVs are with it...). But if
 NIR benefits glsl compiler on its own that looks all good to me, it's
 just an area I'm not really familiar with.

FWIW, we use AMDIL internally:
http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/AMD_Intermediate_Language_(IL)_Specification_v2.pdf
http://developer.amd.com/tools-and-sdks/opencl-zone/amd-accelerated-parallel-processing-app-sdk/documentation/amd-app-documentation/

Alex


 btw do you have some example of how a shader looks printed out? I'm too
 lazy to play with it myself...

 Roland

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Fix surf-bankh init by default value when surf-tile_split == 0

2014-08-18 Thread Marek Olšák
Reviewed-by: Marek Olšák marek.ol...@amd.com

Do you have commit access?

Marek

On Mon, Aug 18, 2014 at 9:59 PM, Maks Naumov maksq...@ukr.net wrote:
 Signed-off-by: Maks Naumov maksq...@ukr.net
 ---
  radeon/radeon_surface.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

 diff --git a/radeon/radeon_surface.c b/radeon/radeon_surface.c
 index e056ed4..40a544a 100644
 --- a/radeon/radeon_surface.c
 +++ b/radeon/radeon_surface.c
 @@ -1311,7 +1311,7 @@ static int si_surface_sanity(struct 
 radeon_surface_manager *surf_man,
  /* default value */
  surf-mtilea = 1;
  surf-bankw = 1;
 -surf-bankw = 1;
 +surf-bankh = 1;
  surf-tile_split = 64;
  surf-stencil_tile_split = 64;
  }
 @@ -2138,7 +2138,7 @@ static int cik_surface_sanity(struct 
 radeon_surface_manager *surf_man,
  /* default value */
  surf-mtilea = 1;
  surf-bankw = 1;
 -surf-bankw = 1;
 +surf-bankh = 1;
  surf-tile_split = 64;
  surf-stencil_tile_split = 64;
  }
 --
 1.9.1



 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Fix surf-bankh init by default value when surf-tile_split == 0

2014-08-18 Thread Maks Naumov
I don't have access to git.

 Reviewed-by: Marek Olšák marek.ol...@amd.com
 
 Do you have commit access?
 
 Marek
 
 On Mon, Aug 18, 2014 at 9:59 PM, Maks Naumov maksq...@ukr.net wrote:
  Signed-off-by: Maks Naumov maksq...@ukr.net
  ---
  radeon/radeon_surface.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)
 
  diff --git a/radeon/radeon_surface.c b/radeon/radeon_surface.c
  index e056ed4..40a544a 100644
  --- a/radeon/radeon_surface.c
  +++ b/radeon/radeon_surface.c
  @@ -1311,7 +1311,7 @@ static int si_surface_sanity(struct 
  radeon_surface_manager *surf_man,
  /* default value */
  surf-mtilea = 1;
  surf-bankw = 1;
  - surf-bankw = 1;
  + surf-bankh = 1;
  surf-tile_split = 64;
  surf-stencil_tile_split = 64;
  }
  @@ -2138,7 +2138,7 @@ static int cik_surface_sanity(struct 
  radeon_surface_manager *surf_man,
  /* default value */
  surf-mtilea = 1;
  surf-bankw = 1;
  - surf-bankw = 1;
  + surf-bankh = 1;
  surf-tile_split = 64;
  surf-stencil_tile_split = 64;
  }
  --
  1.9.1
 
 
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] rbug: fix a crash in sampler_view_destroy caused by incorrect context

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/rbug/rbug_objects.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/rbug/rbug_objects.c 
b/src/gallium/drivers/rbug/rbug_objects.c
index c64b14c..2d80164 100644
--- a/src/gallium/drivers/rbug/rbug_objects.c
+++ b/src/gallium/drivers/rbug/rbug_objects.c
@@ -137,7 +137,7 @@ rbug_sampler_view_create(struct rbug_context *rb_context,
rb_view-base.reference.count = 1;
rb_view-base.texture = NULL;
pipe_resource_reference(rb_view-base.texture, rb_resource-base);
-   rb_view-base.context = rb_context-pipe;
+   rb_view-base.context = rb_context-base;
rb_view-sampler_view = view;
 
return rb_view-base;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] rbug: only add textures to the list

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

rbug-gui cannot display buffers, so it's pointless to add them.
---
 src/gallium/drivers/rbug/rbug_objects.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/rbug/rbug_objects.c 
b/src/gallium/drivers/rbug/rbug_objects.c
index 2d80164..db18f2e 100644
--- a/src/gallium/drivers/rbug/rbug_objects.c
+++ b/src/gallium/drivers/rbug/rbug_objects.c
@@ -58,7 +58,8 @@ rbug_resource_create(struct rbug_screen *rb_screen,
rb_resource-base.screen = rb_screen-base;
rb_resource-resource = resource;
 
-   rbug_screen_add_to_list(rb_screen, resources, rb_resource);
+   if (resource-target != PIPE_BUFFER)
+  rbug_screen_add_to_list(rb_screen, resources, rb_resource);
 
return rb_resource-base;
 
@@ -71,7 +72,9 @@ void
 rbug_resource_destroy(struct rbug_resource *rb_resource)
 {
struct rbug_screen *rb_screen = rbug_screen(rb_resource-base.screen);
-   rbug_screen_remove_from_list(rb_screen, resources, rb_resource);
+
+   if (rb_resource-base.target != PIPE_BUFFER)
+  rbug_screen_remove_from_list(rb_screen, resources, rb_resource);
 
pipe_resource_reference(rb_resource-resource, NULL);
FREE(rb_resource);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] rbug: remove contexts from the list properly

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/rbug/rbug_context.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/rbug/rbug_context.c 
b/src/gallium/drivers/rbug/rbug_context.c
index 62fe543..ca94590 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -40,10 +40,12 @@
 static void
 rbug_destroy(struct pipe_context *_pipe)
 {
+   struct rbug_screen *rb_screen = rbug_screen(_pipe-screen);
struct rbug_context *rb_pipe = rbug_context(_pipe);
struct pipe_context *pipe = rb_pipe-pipe;
 
-   remove_from_list(rb_pipe-list);
+   rbug_screen_remove_from_list(rb_screen, contexts, rb_pipe);
+
pipe_mutex_lock(rb_pipe-call_mutex);
pipe-destroy(pipe);
rb_pipe-pipe = NULL;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] rbug: fix crash in set_vertex_buffers

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/rbug/rbug_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/rbug/rbug_context.c 
b/src/gallium/drivers/rbug/rbug_context.c
index ca94590..d6fca2e 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -758,7 +758,7 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe,
 
pipe_mutex_lock(rb_pipe-call_mutex);
 
-   if (num_buffers) {
+   if (num_buffers  _buffers) {
   memcpy(unwrapped_buffers, _buffers, num_buffers * sizeof(*_buffers));
   for (i = 0; i  num_buffers; i++)
  unwrapped_buffers[i].buffer = 
rbug_resource_unwrap(_buffers[i].buffer);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] rbug: send the actual number of layers to the client

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

This sends the correct value for array textures.
---
 src/gallium/drivers/rbug/rbug_core.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/rbug/rbug_core.c 
b/src/gallium/drivers/rbug/rbug_core.c
index c5b26b8..ece5e2f 100644
--- a/src/gallium/drivers/rbug/rbug_core.c
+++ b/src/gallium/drivers/rbug/rbug_core.c
@@ -204,6 +204,7 @@ rbug_texture_info(struct rbug_rbug *tr_rbug, struct 
rbug_header *header, uint32_
struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info 
*)header;
struct rbug_list *ptr;
struct pipe_resource *t;
+   unsigned num_layers;
 
pipe_mutex_lock(rb_screen-list_mutex);
foreach(ptr, rb_screen-resources) {
@@ -219,11 +220,13 @@ rbug_texture_info(struct rbug_rbug *tr_rbug, struct 
rbug_header *header, uint32_
}
 
t = tr_tex-resource;
+   num_layers = util_max_layer(t, 0) + 1;
+
rbug_send_texture_info_reply(tr_rbug-con, serial,
t-target, t-format,
t-width0, 1,
t-height0, 1,
-   t-depth0, 1,
+   num_layers, 1,
util_format_get_blockwidth(t-format),
util_format_get_blockheight(t-format),
util_format_get_blocksize(t-format),
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] rbug: implement streamout context functions

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/rbug/rbug_context.c | 46 +
 1 file changed, 46 insertions(+)

diff --git a/src/gallium/drivers/rbug/rbug_context.c 
b/src/gallium/drivers/rbug/rbug_context.c
index d6fca2e..71bc216 100644
--- a/src/gallium/drivers/rbug/rbug_context.c
+++ b/src/gallium/drivers/rbug/rbug_context.c
@@ -803,6 +803,49 @@ rbug_set_sample_mask(struct pipe_context *_pipe,
pipe_mutex_unlock(rb_pipe-call_mutex);
 }
 
+static struct pipe_stream_output_target *
+rbug_create_stream_output_target(struct pipe_context *_pipe,
+ struct pipe_resource *_res,
+ unsigned buffer_offset, unsigned buffer_size)
+{
+   struct rbug_context *rb_pipe = rbug_context(_pipe);
+   struct pipe_context *pipe = rb_pipe-pipe;
+   struct pipe_resource *res = rbug_resource_unwrap(_res);
+   struct pipe_stream_output_target *target;
+
+   pipe_mutex_lock(rb_pipe-call_mutex);
+   target = pipe-create_stream_output_target(pipe, res, buffer_offset,
+  buffer_size);
+   pipe_mutex_unlock(rb_pipe-call_mutex);
+   return target;
+}
+
+static void
+rbug_stream_output_target_destroy(struct pipe_context *_pipe,
+  struct pipe_stream_output_target *target)
+{
+   struct rbug_context *rb_pipe = rbug_context(_pipe);
+   struct pipe_context *pipe = rb_pipe-pipe;
+
+   pipe_mutex_lock(rb_pipe-call_mutex);
+   pipe-stream_output_target_destroy(pipe, target);
+   pipe_mutex_unlock(rb_pipe-call_mutex);
+}
+
+static void
+rbug_set_stream_output_targets(struct pipe_context *_pipe,
+   unsigned num_targets,
+   struct pipe_stream_output_target **targets,
+   const unsigned *offsets)
+{
+   struct rbug_context *rb_pipe = rbug_context(_pipe);
+   struct pipe_context *pipe = rb_pipe-pipe;
+
+   pipe_mutex_lock(rb_pipe-call_mutex);
+   pipe-set_stream_output_targets(pipe, num_targets, targets, offsets);
+   pipe_mutex_unlock(rb_pipe-call_mutex);
+}
+
 static void
 rbug_resource_copy_region(struct pipe_context *_pipe,
   struct pipe_resource *_dst,
@@ -1174,6 +1217,9 @@ rbug_context_create(struct pipe_screen *_screen, struct 
pipe_context *pipe)
rb_pipe-base.set_vertex_buffers = rbug_set_vertex_buffers;
rb_pipe-base.set_index_buffer = rbug_set_index_buffer;
rb_pipe-base.set_sample_mask = rbug_set_sample_mask;
+   rb_pipe-base.create_stream_output_target = 
rbug_create_stream_output_target;
+   rb_pipe-base.stream_output_target_destroy = 
rbug_stream_output_target_destroy;
+   rb_pipe-base.set_stream_output_targets = rbug_set_stream_output_targets;
rb_pipe-base.resource_copy_region = rbug_resource_copy_region;
rb_pipe-base.blit = rbug_blit;
rb_pipe-base.flush_resource = rbug_flush_resource;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] i965: Enable instruction compaction on Gen8+.

2014-08-18 Thread Chris Forbes
s/Gen8+/Gen8/ in the commit message, since it's still disabled for future gens.

On Tue, Aug 19, 2014 at 6:19 AM, Matt Turner matts...@gmail.com wrote:
 ---
  src/mesa/drivers/dri/i965/brw_eu_compact.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
 b/src/mesa/drivers/dri/i965/brw_eu_compact.c
 index 727fef5..280d7f7 100644
 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
 +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
 @@ -1101,7 +1101,7 @@ brw_compact_instructions(struct brw_compile *p, int 
 start_offset,
  */
 int old_ip[(p-next_insn_offset - start_offset) / 8];

 -   if (brw-gen  6 || brw-gen = 8)
 +   if (brw-gen  6 || brw-gen  8)
return;

 int src_offset;
 --
 1.8.5.5

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] i965: Update JIP/UIP compaction code to operate on bytes.

2014-08-18 Thread Kenneth Graunke
On Monday, August 18, 2014 11:19:48 AM Matt Turner wrote:
 JIP/UIP were previously in units of compacted instructions. On Gen8
 they're in units of bytes.
 ---
  src/mesa/drivers/dri/i965/brw_eu_compact.c | 10 ++
  1 file changed, 6 insertions(+), 4 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
 b/src/mesa/drivers/dri/i965/brw_eu_compact.c
 index 25a96e7..f100297 100644
 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
 +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
 @@ -653,17 +653,19 @@ static void
  update_uip_jip(struct brw_context *brw, brw_inst *insn,
 int this_old_ip, int *compacted_counts)
  {
 -   int jip = brw_inst_jip(brw, insn);
 +   int scale = brw-gen = 8 ? sizeof(brw_compact_inst) : 1;
 +
 +   int32_t jip = brw_inst_jip(brw, insn) / scale;
 jip -= compacted_between(this_old_ip, this_old_ip + jip, 
 compacted_counts);
 -   brw_inst_set_jip(brw, insn, jip);
 +   brw_inst_set_jip(brw, insn, jip * scale);
  
 if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF ||
 brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE)
return;
  
 -   int uip = brw_inst_uip(brw, insn);
 +   int32_t uip = brw_inst_uip(brw, insn) / scale;
 uip -= compacted_between(this_old_ip, this_old_ip + uip, 
 compacted_counts);
 -   brw_inst_set_uip(brw, insn, uip);
 +   brw_inst_set_uip(brw, insn, uip * scale);
  }
  
  void
 

This originally confused me a bit, but I believe it's correct.
Here, your local variable jip is the jump distance in units of number of 
compact instructions.  So, for Broadwell, you convert from bytes to that, 
subtract some number of compact instructions, and scale back up.

You could instead do:

   int32_t jip = brw_inst_jip(brw, insn);
   jip -= scale *
  compacted_between(this_old_ip, this_old_ip + jip, compacted_counts);
   brw_inst_set_jip(brw, insn, jip);

which is a bit less frobbing around and unit conversions.

You could also do:

   int scale = brw_jump_scale(brw) / 2;

if you wanted.  I'll leave it up to you which style you prefer.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org




signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/12] i965: Implement fast color clears using meta operations

2014-08-18 Thread Chris Forbes
Ken,

It would be nice to get that patch [1] in fairly soon -- master is
currently unusable.

[1]  
http://cgit.freedesktop.org/~kwg/mesa/commit/?h=texturelockid=51b6879849f1efcfb28a45a63c2230ad0b2292e7

Consider it:

Reviewed-and-tested-by: Chris Forbes chr...@ijw.co.nz

On Mon, Aug 18, 2014 at 9:36 AM, Chris Forbes chr...@ijw.co.nz wrote:
 Yes, that fixes it.

 On Mon, Aug 18, 2014 at 9:01 AM, Kristian Høgsberg hoegsb...@gmail.com 
 wrote:
 On Sun, Aug 17, 2014 at 11:36:55PM +1200, Chris Forbes wrote:
 This commit (2f28a0dc2 on master) causes various apps (at least
 glxgears  vlc) to render garbage on my HSW GT3e. There are regular
 vertical bands of black pixels; on some frames, a few blocks of pixels
 within those bands are present; on others, not.

 Is that fixed by

   
 http://cgit.freedesktop.org/~kwg/mesa/commit/?h=texturelockid=51b6879849f1efcfb28a45a63c2230ad0b2292e7

 ?

 The docs say When performing a render target resolve, PIPE_CONTROL with
 end of pipe sync must be delivered., which doesn't make it clear whether
 it's before or after.  A RC flush before doing the resolve certainly makes
 sense, since you'd expect the resolve operation to have to read back from
 the MCS.

 Kristian


 On Tue, Aug 12, 2014 at 5:45 PM, Kristian Høgsberg hoegsb...@gmail.com 
 wrote:
  On Mon, Aug 11, 2014 at 08:46:23PM -0400, Ilia Mirkin wrote:
  On Mon, Aug 11, 2014 at 8:29 PM, Kristian Høgsberg k...@bitplanet.net 
  wrote:
   diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c 
   b/src/mesa/drivers/dri/i965/intel_tex_copy.c
   index 97f1569..2456080 100644
   --- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
   +++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
   @@ -79,6 +79,8 @@ intel_copy_texsubimage(struct brw_context *brw,
   int dst_slice = slice + intelImage-base.Base.Face +
   intelImage-base.Base.TexObject-MinLayer;
  
   +   _mesa_unlock_texture(brw-ctx, intelImage-base.Base.TexObject);
   +
   /* blit from src buffer to texture */
   if (!intel_miptree_blit(brw,
   irb-mt, irb-mt_level, irb-mt_layer,
   @@ -89,6 +91,8 @@ intel_copy_texsubimage(struct brw_context *brw,
  return false;
   }
  
   +   _mesa_lock_texture(brw-ctx, intelImage-base.Base.TexObject);
   +
   return true;
}
 
  Just happened to notice in some aimless scrolling... you return above,
  so in one path you don't relock the texture, but in the other you do.
  Usually that sort of thing leads to trouble, but perhaps there's
  something going on here which makes it OK. If that's the case,
  probably deserves a comment.
 
  Nope, my bad.  Thanks for catching that.
 
  Kristian
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] i965: Enable instruction compaction on Gen8+.

2014-08-18 Thread Kenneth Graunke
On Monday, August 18, 2014 11:19:53 AM Matt Turner wrote:
 ---
  src/mesa/drivers/dri/i965/brw_eu_compact.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
 b/src/mesa/drivers/dri/i965/brw_eu_compact.c
 index 727fef5..280d7f7 100644
 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
 +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
 @@ -1101,7 +1101,7 @@ brw_compact_instructions(struct brw_compile *p, int 
 start_offset,
  */
 int old_ip[(p-next_insn_offset - start_offset) / 8];
  
 -   if (brw-gen  6 || brw-gen = 8)
 +   if (brw-gen  6 || brw-gen  8)
return;
  
 int src_offset;
 

I see no reason why this wouldn't work on Skylake, so I'd just go ahead and do:

if (brw-gen  6)
   return;

With that change, this is:
Reviewed-by: Kenneth Graunke kenn...@whitecape.org

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] i965: Reverse condition ordering to let us support other gens.

2014-08-18 Thread Kenneth Graunke
On Monday, August 18, 2014 11:19:47 AM Matt Turner wrote:
 ---
  src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
 b/src/mesa/drivers/dri/i965/brw_eu_compact.c
 index 625cfbb..25a96e7 100644
 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
 +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
 @@ -789,14 +789,14 @@ brw_compact_instructions(struct brw_compile *p, int 
 start_offset,
case BRW_OPCODE_ELSE:
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_WHILE:
 - if (brw-gen == 6) {
 + if (brw-gen = 7) {
 +update_uip_jip(brw, insn, this_old_ip, compacted_counts);
 + } else if (brw-gen == 6) {
  int gen6_jump_count = brw_inst_gen6_jump_count(brw, insn);
  target_old_ip = this_old_ip + gen6_jump_count;
  target_compacted_count = compacted_counts[target_old_ip];
  gen6_jump_count -= (target_compacted_count - 
 this_compacted_count);
  brw_inst_set_gen6_jump_count(brw, insn, gen6_jump_count);
 - } else {
 -update_uip_jip(brw, insn, this_old_ip, compacted_counts);
   }
   break;
}
 

This isn't necessary - gen = 8 would've failed the gen == 6 check and hit the 
else case.  You've just swapped them around for readability...which I like.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] radeonsi: use r600_draw_rectangle from r600g

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

Rectangles are easier than triangles for the rasterizer.
---
 src/gallium/drivers/r600/r600_blit.c  |  1 -
 src/gallium/drivers/r600/r600_pipe.c  |  1 -
 src/gallium/drivers/r600/r600_pipe.h  |  4 --
 src/gallium/drivers/r600/r600_state_common.c  | 64 ---
 src/gallium/drivers/radeon/r600_pipe_common.c | 64 +++
 src/gallium/drivers/radeon/r600_pipe_common.h |  8 
 src/gallium/drivers/radeonsi/si_blit.c|  1 -
 src/gallium/drivers/radeonsi/si_pipe.c|  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  7 +--
 9 files changed, 77 insertions(+), 75 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_blit.c 
b/src/gallium/drivers/r600/r600_blit.c
index c98206f..a3cfdae 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -22,7 +22,6 @@
  */
 #include r600_pipe.h
 #include util/u_surface.h
-#include util/u_blitter.h
 #include util/u_format.h
 #include evergreend.h
 
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 4543347..226ad6e 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -30,7 +30,6 @@
 
 #include errno.h
 #include pipe/p_shader_tokens.h
-#include util/u_blitter.h
 #include util/u_debug.h
 #include util/u_memory.h
 #include util/u_simple_shaders.h
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index d04fef8..ee836b7 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -32,7 +32,6 @@
 #include r600_llvm.h
 #include r600_public.h
 
-#include util/u_blitter.h
 #include util/u_suballoc.h
 #include util/u_double_list.h
 #include util/u_transfer.h
@@ -633,9 +632,6 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
 void r600_sampler_states_dirty(struct r600_context *rctx,
   struct r600_sampler_states *state);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct 
r600_constbuf_state *state);
-void r600_draw_rectangle(struct blitter_context *blitter,
-int x1, int y1, int x2, int y2, float depth,
-enum blitter_attrib_type type, const union 
pipe_color_union *attrib);
 uint32_t r600_translate_stencil_op(int s_op);
 uint32_t r600_translate_fill(uint32_t func);
 unsigned r600_tex_wrap(unsigned wrap);
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index d29e137..d2f0d17 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -28,7 +28,6 @@
 #include r600_shader.h
 #include r600d.h
 
-#include util/u_draw_quad.h
 #include util/u_format_s3tc.h
 #include util/u_index_modify.h
 #include util/u_memory.h
@@ -36,8 +35,6 @@
 #include util/u_math.h
 #include tgsi/tgsi_parse.h
 
-#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
-
 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
 {
assert(!cb-buf);
@@ -1550,67 +1547,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, 
const struct pipe_draw_info
rctx-b.num_draw_calls++;
 }
 
-void r600_draw_rectangle(struct blitter_context *blitter,
-int x1, int y1, int x2, int y2, float depth,
-enum blitter_attrib_type type, const union 
pipe_color_union *attrib)
-{
-   struct r600_context *rctx = (struct 
r600_context*)util_blitter_get_pipe(blitter);
-   struct pipe_viewport_state viewport;
-   struct pipe_resource *buf = NULL;
-   unsigned offset = 0;
-   float *vb;
-
-   if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
-   util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, 
type, attrib);
-   return;
-   }
-
-   /* Some operations (like color resolve on r6xx) don't work
-* with the conventional primitive types.
-* One that works is PT_RECTLIST, which we use here. */
-
-   /* setup viewport */
-   viewport.scale[0] = 1.0f;
-   viewport.scale[1] = 1.0f;
-   viewport.scale[2] = 1.0f;
-   viewport.scale[3] = 1.0f;
-   viewport.translate[0] = 0.0f;
-   viewport.translate[1] = 0.0f;
-   viewport.translate[2] = 0.0f;
-   viewport.translate[3] = 0.0f;
-   rctx-b.b.set_viewport_states(rctx-b.b, 0, 1, viewport);
-
-   /* Upload vertices. The hw rectangle has only 3 vertices,
-* I guess the 4th one is derived from the first 3.
-* The vertex specification should match u_blitter's vertex element 
state. */
-   u_upload_alloc(rctx-b.uploader, 0, sizeof(float) * 24, offset, buf, 
(void**)vb);
-   vb[0] = x1;
-   vb[1] = y1;
-   vb[2] = depth;
-   vb[3] = 1;
-
-   vb[8] = x1;
-   vb[9] = y2;
-   vb[10] = depth;
-   vb[11] = 1;
-
-   vb[16] = x2;
-   vb[17] = y1;
-

[Mesa-dev] [PATCH 2/6] gallium/u_blitter: don't use an empty fragment shader if there's a colorbuffer

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

This is custom code used by some drivers.
---
 src/gallium/auxiliary/util/u_blitter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c 
b/src/gallium/auxiliary/util/u_blitter.c
index 20fbd80..609e02f 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1799,7 +1799,7 @@ void util_blitter_custom_depth_stencil(struct 
blitter_context *blitter,
pipe-bind_blend_state(pipe, cbsurf ? ctx-blend[PIPE_MASK_RGBA] :
  ctx-blend[0]);
pipe-bind_depth_stencil_alpha_state(pipe, dsa_stage);
-   ctx-bind_fs_state(pipe, ctx-fs_empty);
+   ctx-bind_fs_state(pipe, cbsurf ? ctx-fs_write_one_cbuf : ctx-fs_empty);
pipe-bind_vertex_elements_state(pipe, ctx-velem_state);
 
/* set a framebuffer state */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] radeonsi: save scissor state and sample mask for u_blitter

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_blit.c  |  7 +++
 src/gallium/drivers/radeonsi/si_state.c | 16 ++--
 src/gallium/drivers/radeonsi/si_state.h | 14 --
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index bc31dfd..9a7a2fe 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -59,9 +59,16 @@ static void si_blitter_begin(struct pipe_context *ctx, enum 
si_blitter_op op)
util_blitter_save_geometry_shader(sctx-blitter, sctx-gs_shader);
util_blitter_save_vertex_shader(sctx-blitter, sctx-vs_shader);
util_blitter_save_vertex_elements(sctx-blitter, sctx-vertex_elements);
+   if (sctx-queued.named.sample_mask) {
+   util_blitter_save_sample_mask(sctx-blitter,
+ 
sctx-queued.named.sample_mask-sample_mask);
+   }
if (sctx-queued.named.viewport) {
util_blitter_save_viewport(sctx-blitter, 
sctx-queued.named.viewport-viewport);
}
+   if (sctx-queued.named.scissor) {
+   util_blitter_save_scissor(sctx-blitter, 
sctx-queued.named.scissor-scissor);
+   }
util_blitter_save_vertex_buffer_slot(sctx-blitter, 
sctx-vertex_buffer);
util_blitter_save_so_targets(sctx-blitter, 
sctx-b.streamout.num_targets,
 (struct 
pipe_stream_output_target**)sctx-b.streamout.targets);
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 98c19d6..fc928f3 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -458,18 +458,20 @@ static void si_set_scissor_states(struct pipe_context 
*ctx,
   const struct pipe_scissor_state *state)
 {
struct si_context *sctx = (struct si_context *)ctx;
-   struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+   struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
+   struct si_pm4_state *pm4 = scissor-pm4;
 
-   if (pm4 == NULL)
+   if (scissor == NULL)
return;
 
+   scissor-scissor = *state;
si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
   S_028250_TL_X(state-minx) | S_028250_TL_Y(state-miny) |
   S_028250_WINDOW_OFFSET_DISABLE(1));
si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
   S_028254_BR_X(state-maxx) | S_028254_BR_Y(state-maxy));
 
-   si_pm4_set_state(sctx, scissor, pm4);
+   si_pm4_set_state(sctx, scissor, scissor);
 }
 
 static void si_set_viewport_states(struct pipe_context *ctx,
@@ -2774,16 +2776,18 @@ static void si_bind_sampler_states(struct pipe_context 
*ctx, unsigned shader,
 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
struct si_context *sctx = (struct si_context *)ctx;
-   struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
+   struct si_state_sample_mask *state = 
CALLOC_STRUCT(si_state_sample_mask);
+   struct si_pm4_state *pm4 = state-pm4;
uint16_t mask = sample_mask;
 
-if (pm4 == NULL)
+if (state == NULL)
 return;
 
+   state-sample_mask = mask;
si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask  
16));
si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask  
16));
 
-   si_pm4_set_state(sctx, sample_mask, pm4);
+   si_pm4_set_state(sctx, sample_mask, state);
 }
 
 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 82bea79..ce18a27 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -38,6 +38,16 @@ struct si_state_blend {
boolalpha_to_one;
 };
 
+struct si_state_sample_mask {
+   struct si_pm4_state pm4;
+   uint16_tsample_mask;
+};
+
+struct si_state_scissor {
+   struct si_pm4_state pm4;
+   struct pipe_scissor_state   scissor;
+};
+
 struct si_state_viewport {
struct si_pm4_state pm4;
struct pipe_viewport_state  viewport;
@@ -82,8 +92,8 @@ union si_state {
struct si_state_blend   *blend;
struct si_pm4_state *blend_color;
struct si_pm4_state *clip;
-   struct si_pm4_state *sample_mask;
-   struct si_pm4_state *scissor;
+   struct si_state_sample_mask *sample_mask;
+   struct si_state_scissor *scissor;
struct si_state_viewport*viewport;
 

[Mesa-dev] [PATCH 3/6] radeonsi: don't set CB_SHADER_MASK=1 if there are no color outputs

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

This hack isn't needed anymore because of the previous u_blitter commit.
---
 src/gallium/drivers/radeonsi/si_shader.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 3fcd314..08ba8b0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1514,10 +1514,6 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
last_args[6]= uint-zero;
last_args[7]= uint-zero;
last_args[8]= uint-zero;
-
-   si_shader_ctx-shader-spi_shader_col_format |=
-   V_028714_SPI_SHADER_32_ABGR;
-   si_shader_ctx-shader-cb_shader_mask |= 
S_02823C_OUTPUT0_ENABLE(0xf);
}
 
/* Specify whether the EXEC mask represents the valid mask */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] radeonsi: simplify si_num_banks function

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

This makes it easier to use.
---
 src/gallium/drivers/radeonsi/si_dma.c   |  6 ++
 src/gallium/drivers/radeonsi/si_state.c | 19 ++-
 src/gallium/drivers/radeonsi/si_state.h |  3 +--
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_dma.c 
b/src/gallium/drivers/radeonsi/si_dma.c
index e908746..a69f533 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -173,8 +173,7 @@ static void si_dma_copy_tile(struct si_context *ctx,
tile_split = cik_tile_split(rsrc-surface.tile_split);
tile_mode_index = si_tile_mode_index(rsrc, src_level,
 
util_format_has_stencil(util_format_description(src-format)));
-   nbanks = si_num_banks(sscreen, rsrc-surface.bpe, 
rsrc-surface.tile_split,
- tile_mode_index);
+   nbanks = si_num_banks(sscreen, rsrc);
base += rsrc-resource.gpu_address;
addr += rdst-resource.gpu_address;
} else {
@@ -202,8 +201,7 @@ static void si_dma_copy_tile(struct si_context *ctx,
tile_split = cik_tile_split(rdst-surface.tile_split);
tile_mode_index = si_tile_mode_index(rdst, dst_level,
 
util_format_has_stencil(util_format_description(dst-format)));
-   nbanks = si_num_banks(sscreen, rdst-surface.bpe, 
rdst-surface.tile_split,
- tile_mode_index);
+   nbanks = si_num_banks(sscreen, rdst);
base += rdst-resource.gpu_address;
addr += rsrc-resource.gpu_address;
}
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index fc928f3..4ab2b8b 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -47,15 +47,14 @@ static void si_init_atom(struct r600_atom *atom, struct 
r600_atom **list_elem,
*list_elem = atom;
 }
 
-uint32_t si_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned 
tile_split,
- unsigned tile_mode_index)
+uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
 {
-   if ((sscreen-b.chip_class == CIK) 
+   if (sscreen-b.chip_class == CIK 
sscreen-b.info.cik_macrotile_mode_array_valid) {
unsigned index, tileb;
 
-   tileb = 8 * 8 * bpe;
-   tileb = MIN2(tile_split, tileb);
+   tileb = 8 * 8 * tex-surface.bpe;
+   tileb = MIN2(tex-surface.tile_split, tileb);
 
for (index = 0; tileb  64; index++) {
tileb = 1;
@@ -65,11 +64,14 @@ uint32_t si_num_banks(struct si_screen *sscreen, unsigned 
bpe, unsigned tile_spl
return (sscreen-b.info.cik_macrotile_mode_array[index]  6)  
0x3;
}
 
-   if ((sscreen-b.chip_class == SI) 
+   if (sscreen-b.chip_class == SI 
sscreen-b.info.si_tile_mode_array_valid) {
+   /* Don't use stencil_tiling_index, because num_banks is always
+* read from the depth mode. */
+   unsigned tile_mode_index = tex-surface.tiling_index[0];
assert(tile_mode_index  32);
 
-   return (sscreen-b.info.si_tile_mode_array[tile_mode_index]  
20)  0x3;
+   return 
G_009910_NUM_BANKS(sscreen-b.info.si_tile_mode_array[tile_mode_index]);
}
 
/* The old way. */
@@ -1820,8 +1822,7 @@ static void si_init_depth_surface(struct si_context *sctx,
macro_aspect = cik_macro_tile_aspect(macro_aspect);
bankw = cik_bank_wh(bankw);
bankh = cik_bank_wh(bankh);
-   nbanks = si_num_banks(sscreen, rtex-surface.bpe, 
rtex-surface.tile_split,
- ~0);
+   nbanks = si_num_banks(sscreen, rtex);
tile_mode_index = si_tile_mode_index(rtex, level, false);
pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index ce18a27..7362ad1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -263,8 +263,7 @@ unsigned cik_bank_wh(unsigned bankwh);
 unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode);
 unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect);
 unsigned cik_tile_split(unsigned tile_split);
-uint32_t si_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned 
tile_split,
- unsigned tile_mode_index);
+uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex);
 unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool 
stencil);
 
 /* si_state_draw.c */

[Mesa-dev] [PATCH 1/6] gallium/util: handle PIPE_BUFFER in util_pipe_tex_to_tgsi_tex

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/auxiliary/util/u_inlines.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_inlines.h 
b/src/gallium/auxiliary/util/u_inlines.h
index e952615..c80ec48 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -565,6 +565,9 @@ util_pipe_tex_to_tgsi_tex(enum pipe_texture_target 
pipe_tex_target,
   unsigned nr_samples)
 {
switch (pipe_tex_target) {
+   case PIPE_BUFFER:
+  return TGSI_TEXTURE_BUFFER;
+
case PIPE_TEXTURE_1D:
   assert(nr_samples = 1);
   return TGSI_TEXTURE_1D;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] radeonsi: set IA_MULTI_VGT_PARAM on SI the same as on CIK (v2)

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

Nothing's changed for CIK here.
---
 src/gallium/drivers/radeonsi/si_state.c  |  6 --
 src/gallium/drivers/radeonsi/si_state_draw.c | 90 +++-
 2 files changed, 50 insertions(+), 46 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 3d1e02a..0c6f62a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3110,12 +3110,6 @@ void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
 
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
-   if (sctx-b.chip_class == SI) {
-   si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM,
-  S_028AA8_SWITCH_ON_EOP(1) |
-  S_028AA8_PARTIAL_VS_WAVE_ON(1) |
-  S_028AA8_PRIMGROUP_SIZE(63));
-   }
si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (sctx-b.chip_class  CIK)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 573487c..2e999f6 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -379,6 +379,53 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode)
return prim_conv[mode];
 }
 
+static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
+ const struct pipe_draw_info *info)
+{
+   struct si_state_rasterizer *rs = sctx-queued.named.rasterizer;
+   unsigned prim = info-mode;
+   unsigned primgroup_size = 64;
+
+   /* SWITCH_ON_EOP(0) is always preferable. */
+   bool wd_switch_on_eop = false;
+   bool ia_switch_on_eop = false;
+
+   /* This is a hardware requirement. */
+   if ((rs  rs-line_stipple_enable) ||
+   (sctx-b.screen-debug_flags  DBG_SWITCH_ON_EOP)) {
+   ia_switch_on_eop = true;
+   wd_switch_on_eop = true;
+   }
+
+   if (sctx-b.chip_class = CIK) {
+   /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
+* 4 shader engines. Set 1 to pass the assertion below.
+* The other cases are hardware requirements. */
+   if (sctx-b.screen-info.max_se  4 ||
+   prim == PIPE_PRIM_POLYGON ||
+   prim == PIPE_PRIM_LINE_LOOP ||
+   prim == PIPE_PRIM_TRIANGLE_FAN ||
+   prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY ||
+   info-primitive_restart)
+   wd_switch_on_eop = true;
+
+   /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP 
is 0.
+* We don't know that for indirect drawing, so treat it as
+* always problematic. */
+   if (sctx-b.family == CHIP_HAWAII 
+   (info-indirect || info-instance_count  1))
+   wd_switch_on_eop = true;
+
+   /* If the WD switch is false, the IA switch must be false too. 
*/
+   assert(wd_switch_on_eop || !ia_switch_on_eop);
+   }
+
+   return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
+   S_028AA8_PARTIAL_VS_WAVE_ON(1) |
+   S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
+   S_028AA8_WD_SWITCH_ON_EOP(sctx-b.chip_class = CIK ? 
wd_switch_on_eop : 0);
+}
+
 static bool si_update_draw_info_state(struct si_context *sctx,
  const struct pipe_draw_info *info,
  const struct pipe_index_buffer *ib)
@@ -391,6 +438,7 @@ static bool si_update_draw_info_state(struct si_context 
*sctx,
   
sctx-gs_shader-current-shader.gs_output_prim :
   info-mode);
unsigned ls_mask = 0;
+   unsigned ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info);
 
if (pm4 == NULL)
return false;
@@ -401,55 +449,17 @@ static bool si_update_draw_info_state(struct si_context 
*sctx,
}
 
if (sctx-b.chip_class = CIK) {
-   struct si_state_rasterizer *rs = sctx-queued.named.rasterizer;
-   unsigned primgroup_size = 64;
-
-   /* SWITCH_ON_EOP(0) is always preferable. */
-   bool wd_switch_on_eop = false;
-   bool ia_switch_on_eop = false;
-
-   /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
-* 4 shader engines. Set 1 to pass the assertion below.
-* The other cases are hardware requirements. */
-   if (sctx-b.screen-info.max_se  4 ||
-   prim == V_008958_DI_PT_POLYGON ||
-   prim == V_008958_DI_PT_LINELOOP ||
-   prim == 

[Mesa-dev] [PATCH 3/4] radeonsi: bump PRIMGROUP_SIZE for some cases

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

Recommended by hw people.
---
 src/gallium/drivers/radeonsi/si_state_draw.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index f5d6550..0f700a8 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -384,13 +384,16 @@ static unsigned si_get_ia_multi_vgt_param(struct 
si_context *sctx,
 {
struct si_state_rasterizer *rs = sctx-queued.named.rasterizer;
unsigned prim = info-mode;
-   unsigned primgroup_size = 64;
+   unsigned primgroup_size = 128; /* recommended without a GS */
 
/* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false;
bool ia_switch_on_eop = false;
bool partial_vs_wave = false;
 
+   if (sctx-gs_shader)
+   primgroup_size = 64; /* recommended with a GS */
+
/* This is a hardware requirement. */
if ((rs  rs-line_stipple_enable) ||
(sctx-b.screen-debug_flags  DBG_SWITCH_ON_EOP)) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] r600g: copy IA_MULTI_VGT_PARAM programming from radeonsi for Cayman

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/r600/evergreen_state.c   |  2 --
 src/gallium/drivers/r600/r600_pipe.h |  2 +-
 src/gallium/drivers/r600/r600_state_common.c | 24 
 src/gallium/drivers/r600/r600d.h | 11 +++
 4 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index e6e9f49..841ad0c 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2243,8 +2243,6 @@ static void cayman_init_atom_start_cs(struct r600_context 
*rctx)
 
r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3  1) | 1);
 
-   r600_store_context_reg(cb, CM_R_028AA8_IA_MULTI_VGT_PARAM, 
S_028AA8_SWITCH_ON_EOP(1) | S_028AA8_PARTIAL_VS_WAVE_ON(1) | 
S_028AA8_PRIMGROUP_SIZE(63));
-
r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 
2);
r600_store_value(cb, 0x76543210); /* 
CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 */
r600_store_value(cb, 0xfedcba98); /* 
CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 */
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index ee836b7..e277269 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -40,7 +40,7 @@
 
 /* the number of CS dwords for flushing and drawing */
 #define R600_MAX_FLUSH_CS_DWORDS   16
-#define R600_MAX_DRAW_CS_DWORDS37
+#define R600_MAX_DRAW_CS_DWORDS40
 #define R600_TRACE_CS_DWORDS   7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index d2f0d17..7594d0e 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1418,6 +1418,30 @@ static void r600_draw_vbo(struct pipe_context *ctx, 
const struct pipe_draw_info
r600_emit_atom(rctx, rctx-atoms[i]);
}
 
+   if (rctx-b.chip_class == CAYMAN) {
+   /* Copied from radeonsi. */
+   unsigned primgroup_size = 128; /* recommended without a GS */
+   bool ia_switch_on_eop = false;
+   bool partial_vs_wave = false;
+
+   if (rctx-gs_shader)
+   primgroup_size = 64; /* recommended with a GS */
+
+   if ((rctx-rasterizer  rctx-rasterizer-pa_sc_line_stipple) 
||
+   (rctx-b.screen-debug_flags  DBG_SWITCH_ON_EOP)) {
+   ia_switch_on_eop = true;
+   }
+
+   if (rctx-b.streamout.streamout_enabled ||
+   rctx-b.streamout.prims_gen_query_enabled)
+   partial_vs_wave = true;
+
+   r600_write_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM,
+  S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) 
|
+  
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
+  S_028AA8_PRIMGROUP_SIZE(primgroup_size - 
1));
+   }
+
/* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles,
 * even though it should have no effect on those. */
if (rctx-b.chip_class == R600  rctx-rasterizer) {
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 8405fbb..17568ab 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -3747,6 +3747,17 @@
 #define SQ_TEX_INST_SAMPLE_C_G_LB  0x1E
 #define SQ_TEX_INST_SAMPLE_C_G_LZ  0x1F
 
+#define CM_R_028AA8_IA_MULTI_VGT_PARAM0x028AA8
+#define   S_028AA8_PRIMGROUP_SIZE(x)   (((x)  0x)  0)
+#define   G_028AA8_PRIMGROUP_SIZE(x)   (((x)  0)  0x)
+#define   C_028AA8_PRIMGROUP_SIZE  0x
+#define   S_028AA8_PARTIAL_VS_WAVE_ON(x)   (((x)  0x1)  16)
+#define   G_028AA8_PARTIAL_VS_WAVE_ON(x)   (((x)  16)  0x1)
+#define   C_028AA8_PARTIAL_VS_WAVE_ON  0xFFFE
+#define   S_028AA8_SWITCH_ON_EOP(x)(((x)  0x1)  17)
+#define   G_028AA8_SWITCH_ON_EOP(x)(((x)  17)  0x1)
+#define   C_028AA8_SWITCH_ON_EOP   0xFFFD
+
 /* async DMA packets */
 #define DMA_PACKET(cmd, t, s, n)   cmd)  0xF)  28) |\
(((t)  0x1)  23) |   \
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] radeonsi: set PARTIAL_VS_WAVE(0) when appropriate

2014-08-18 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/radeonsi/si_state_draw.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 2e999f6..f5d6550 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -389,6 +389,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context 
*sctx,
/* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false;
bool ia_switch_on_eop = false;
+   bool partial_vs_wave = false;
 
/* This is a hardware requirement. */
if ((rs  rs-line_stipple_enable) ||
@@ -397,6 +398,10 @@ static unsigned si_get_ia_multi_vgt_param(struct 
si_context *sctx,
wd_switch_on_eop = true;
}
 
+   if (sctx-b.streamout.streamout_enabled ||
+   sctx-b.streamout.prims_gen_query_enabled)
+   partial_vs_wave = true;
+
if (sctx-b.chip_class = CIK) {
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
 * 4 shader engines. Set 1 to pass the assertion below.
@@ -421,7 +426,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context 
*sctx,
}
 
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
-   S_028AA8_PARTIAL_VS_WAVE_ON(1) |
+   S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
S_028AA8_WD_SWITCH_ON_EOP(sctx-b.chip_class = CIK ? 
wd_switch_on_eop : 0);
 }
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 1/6] mesa: add ARB_conditional_render_inverted flags

2014-08-18 Thread Tobias Klausmann
Also add an extension bit so we can safely enable

Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
---
 src/mesa/main/condrender.c | 10 --
 src/mesa/main/extensions.c |  1 +
 src/mesa/main/mtypes.h |  1 +
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/condrender.c b/src/mesa/main/condrender.c
index 0ad1e5c2..90ae566 100644
--- a/src/mesa/main/condrender.c
+++ b/src/mesa/main/condrender.c
@@ -77,8 +77,14 @@ _mesa_BeginConditionalRender(GLuint queryId, GLenum mode)
case GL_QUERY_NO_WAIT:
case GL_QUERY_BY_REGION_WAIT:
case GL_QUERY_BY_REGION_NO_WAIT:
-  /* OK */
-  break;
+  break; /* OK */
+   case GL_QUERY_WAIT_INVERTED:
+   case GL_QUERY_NO_WAIT_INVERTED:
+   case GL_QUERY_BY_REGION_WAIT_INVERTED:
+   case GL_QUERY_BY_REGION_NO_WAIT_INVERTED:
+   if (ctx-Extensions.ARB_conditional_render_inverted)
+  break; /* OK */
+/* fallthrough - invalid */
default:
   _mesa_error(ctx, GL_INVALID_ENUM, glBeginConditionalRender(mode=%s),
   _mesa_lookup_enum_by_nr(mode));
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index c5bd7b3..553c01e 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -94,6 +94,7 @@ static const struct extension extension_table[] = {
{ GL_ARB_color_buffer_float,  o(ARB_color_buffer_float),  
GL, 2004 },
{ GL_ARB_compressed_texture_pixel_storage,o(dummy_true),  
GL, 2011 },
{ GL_ARB_compute_shader,  o(ARB_compute_shader),  
GL, 2012 },
+   { GL_ARB_conditional_render_inverted, 
o(ARB_conditional_render_inverted), GL, 2014 },
{ GL_ARB_copy_buffer, o(dummy_true),  
GL, 2008 },
{ GL_ARB_copy_image,  o(ARB_copy_image),  
GL, 2012 },
{ GL_ARB_conservative_depth,  o(ARB_conservative_depth),  
GL, 2011 },
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 97b1ad2..cb2a4df 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3553,6 +3553,7 @@ struct gl_extensions
GLboolean ARB_clear_texture;
GLboolean ARB_color_buffer_float;
GLboolean ARB_compute_shader;
+   GLboolean ARB_conditional_render_inverted;
GLboolean ARB_conservative_depth;
GLboolean ARB_copy_image;
GLboolean ARB_depth_buffer_float;
-- 
1.8.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 4/6] nvc0: Handle ARB_conditional_render_inverted and enable it

2014-08-18 Thread Tobias Klausmann
Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
---
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h |  3 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c   | 61 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c  |  3 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c |  3 +-
 4 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index ebeb8c4..8ae78e9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -196,8 +196,9 @@ struct nvc0_context {
unsigned num_tfbbufs;
 
struct pipe_query *cond_query;
-   boolean cond_cond;
+   boolean cond_cond; /* inverted rendering condition */
uint cond_mode;
+   uint32_t cond_condmode; /* the calculated condition */
 
struct nvc0_blitctx *blit;
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 50cef1e..007f8c4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -542,46 +542,51 @@ nvc0_render_condition(struct pipe_context *pipe,
struct nouveau_pushbuf *push = nvc0-base.pushbuf;
struct nvc0_query *q;
uint32_t cond;
-   boolean negated = FALSE;
boolean wait =
   mode != PIPE_RENDER_COND_NO_WAIT 
   mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
 
+   if (!pq) {
+  cond = NVC0_3D_COND_MODE_ALWAYS;
+   }
+   else {
+  q = nvc0_query(pq);
+  /* NOTE: comparison of 2 queries only works if both have completed */
+  switch (q-type) {
+  case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ cond = condition ? NVC0_3D_COND_MODE_EQUAL :
+  NVC0_3D_COND_MODE_NOT_EQUAL;
+ wait = TRUE;
+ break;
+  case PIPE_QUERY_OCCLUSION_COUNTER:
+  case PIPE_QUERY_OCCLUSION_PREDICATE:
+ if (likely(!condition)) {
+if (unlikely(q-nesting))
+   cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
+ NVC0_3D_COND_MODE_ALWAYS;
+else
+   cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
+ } else {
+cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
+ }
+ break;
+  default:
+ assert(!render condition query not a predicate);
+ cond = NVC0_3D_COND_MODE_ALWAYS;
+ break;
+  }
+   }
+
nvc0-cond_query = pq;
nvc0-cond_cond = condition;
+   nvc0-cond_condmode = cond;
nvc0-cond_mode = mode;
 
if (!pq) {
   PUSH_SPACE(push, 1);
-  IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+  IMMED_NVC0(push, NVC0_3D(COND_MODE), cond);
   return;
}
-   q = nvc0_query(pq);
-
-   /* NOTE: comparison of 2 queries only works if both have completed */
-   switch (q-type) {
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-  cond = negated ? NVC0_3D_COND_MODE_EQUAL :
-   NVC0_3D_COND_MODE_NOT_EQUAL;
-  wait = TRUE;
-  break;
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-  if (likely(!negated)) {
- if (unlikely(q-nesting))
-cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
-  NVC0_3D_COND_MODE_ALWAYS;
- else
-cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
-  } else {
- cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
-  }
-  break;
-   default:
-  assert(!render condition query not a predicate);
-  mode = NVC0_3D_COND_MODE_ALWAYS;
-  break;
-   }
 
if (wait)
   nvc0_query_fifo_wait(push, pq);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 7c2f11a..84025ef 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -167,13 +167,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
   return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
   return (class_3d = NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_COMPUTE:
   return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
-   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
- return 0;
 
/* unsupported caps */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index a29f0cc..8aed43b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1210,6 +1210,7 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct 
pipe_blit_info *info)
int64_t du_dx, dv_dy;
int i;
uint32_t mode;
+

[Mesa-dev] [PATCH v4 3/6] mesa/st: Support ARB_conditional_render_inverted modes

2014-08-18 Thread Tobias Klausmann
Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de
---
 src/mesa/state_tracker/st_cb_condrender.c | 20 +++-
 src/mesa/state_tracker/st_extensions.c|  1 +
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_cb_condrender.c 
b/src/mesa/state_tracker/st_cb_condrender.c
index 8776985..eff1341 100644
--- a/src/mesa/state_tracker/st_cb_condrender.c
+++ b/src/mesa/state_tracker/st_cb_condrender.c
@@ -55,6 +55,8 @@ st_BeginConditionalRender(struct gl_context *ctx, struct 
gl_query_object *q,
struct st_query_object *stq = st_query_object(q);
struct st_context *st = st_context(ctx);
uint m;
+   /* Don't invert the condition for rendering by default */
+   boolean invertedCond = FALSE;
 
st_flush_bitmap_cache(st);
 
@@ -71,12 +73,28 @@ st_BeginConditionalRender(struct gl_context *ctx, struct 
gl_query_object *q,
case GL_QUERY_BY_REGION_NO_WAIT:
   m = PIPE_RENDER_COND_BY_REGION_NO_WAIT;
   break;
+   case GL_QUERY_WAIT_INVERTED:
+  m = PIPE_RENDER_COND_WAIT;
+  invertedCond = TRUE;
+  break;
+   case GL_QUERY_NO_WAIT_INVERTED:
+  m = PIPE_RENDER_COND_NO_WAIT;
+  invertedCond = TRUE;
+  break;
+   case GL_QUERY_BY_REGION_WAIT_INVERTED:
+  m = PIPE_RENDER_COND_BY_REGION_WAIT;
+  invertedCond = TRUE;
+  break;
+   case GL_QUERY_BY_REGION_NO_WAIT_INVERTED:
+  m = PIPE_RENDER_COND_BY_REGION_NO_WAIT;
+  invertedCond = TRUE;
+  break;
default:
   assert(0  bad mode in st_BeginConditionalRender);
   m = PIPE_RENDER_COND_WAIT;
}
 
-   cso_set_render_condition(st-cso_context, stq-pq, FALSE, m);
+   cso_set_render_condition(st-cso_context, stq-pq, invertedCond, m);
 }
 
 
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 24e886c..4110eb5 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -460,6 +460,7 @@ void st_init_extensions(struct pipe_screen *screen,
   { o(ARB_sample_shading),   PIPE_CAP_SAMPLE_SHADING   
},
   { o(ARB_draw_indirect),PIPE_CAP_DRAW_INDIRECT
},
   { o(ARB_derivative_control),   PIPE_CAP_TGSI_FS_FINE_DERIVATIVE  
},
+  { o(ARB_conditional_render_inverted),  
PIPE_CAP_CONDITIONAL_RENDER_INVERTED  },
};
 
/* Required: render target and sampler support */
-- 
1.8.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >