[Mesa-dev] [Bug 82538] Super Maryo Chronicles fails with st/mesa assertion failure
https://bugs.freedesktop.org/show_bug.cgi?id=82538 --- Comment #2 from Michel Dänzer mic...@daenzer.net --- (In reply to comment #1) It works fine for me on Kabini :). Mesa git d7d8260f70326cd294715203dae8a8f0150680c1, llvm 3.5-rc2, I can still reproduce it with current Mesa Git. Does your Mesa build have assertions enabled? smc as Debian package in Sid. Same here, currently version 1.9+git20121121-1.1. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] clover: fix _logs string creation
EdB edb+m...@sigluy.net writes: compact::string is not \0 terminated. size() need to be used for std::string creation --- src/gallium/state_trackers/clover/core/program.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index e09c3aa..3f504d5 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -61,9 +61,9 @@ program::build(const ref_vectordevice devs, const char *opts) { dev.ir_target(), build_opts(dev), log)); _binaries.insert({ dev, module }); -_logs.insert({ dev, std::string(log.c_str()) }); +_logs.insert({ dev, std::string(log.c_str(), log.size()) }); } catch (const build_error ) { -_logs.insert({ dev, std::string(log.c_str()) }); +_logs.insert({ dev, std::string(log.c_str(), log.size()) }); Both of these should just be using the conversion operator. See attachment. throw; } } -- 2.0.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev From 3c2bec6d790e6aa38fb6d71cd495f281205ddf6c Mon Sep 17 00:00:00 2001 From: Francisco Jerez curroje...@riseup.net Date: Mon, 18 Aug 2014 09:05:25 +0300 Subject: [PATCH] clover: Use conversion operator to initialize build log from compat::string. Fixes binary garbage in the compilation logs caused by compat::string::c_str() not being null-terminated (which is a bug on its own that will be fixed in another commit). Reported-by: EdB edb+m...@sigluy.net --- src/gallium/state_trackers/clover/core/program.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index 30a1f0e..6c224db 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -61,9 +61,9 @@ program::build(const ref_vectordevice devs, const char *opts) { dev.ir_target(), build_opts(dev), log)); _binaries.insert({ dev, module }); -_logs.insert({ dev, std::string(log.c_str()) }); +_logs.insert({ dev, log }); } catch (const build_error ) { -_logs.insert({ dev, std::string(log.c_str()) }); +_logs.insert({ dev, log }); throw; } } -- 2.0.4 pgpejMi7uG3oD.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] clover: stdify compat::vector a little more
EdB edb+m...@sigluy.net writes: make resize work like std::vector reserve take advantage of capacity rename members to be uniform with other class --- src/gallium/state_trackers/clover/core/module.cpp | 2 +- src/gallium/state_trackers/clover/util/compat.hpp | 113 +++--- 2 files changed, 78 insertions(+), 37 deletions(-) This could be a *lot* simpler, see attachment. From abd573bffb674a0a7565b18b38be116472fa5f24 Mon Sep 17 00:00:00 2001 From: Francisco Jerez curroje...@riseup.net Date: Mon, 18 Aug 2014 08:30:46 +0300 Subject: [PATCH] clover/util: Have compat::vector track separate size and capacity. In order to make the behaviour of resize() and reserve() closer to the standard. --- src/gallium/state_trackers/clover/core/module.cpp | 4 +- src/gallium/state_trackers/clover/util/compat.hpp | 67 ++- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp index 55ed91a..9ef584b 100644 --- a/src/gallium/state_trackers/clover/core/module.cpp +++ b/src/gallium/state_trackers/clover/core/module.cpp @@ -94,7 +94,7 @@ namespace { static void proc(compat::istream is, compat::vectorT v) { - v.reserve(_procuint32_t(is)); + v.resize(_procuint32_t(is)); for (size_t i = 0; i v.size(); i++) new(v[i]) T(_procT(is)); @@ -122,7 +122,7 @@ namespace { static void proc(compat::istream is, compat::vectorT v) { - v.reserve(_procuint32_t(is)); + v.resize(_procuint32_t(is)); is.read(reinterpret_castchar *(v.begin()), v.size() * sizeof(T)); } diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp index 50e1c7d..a4e3938 100644 --- a/src/gallium/state_trackers/clover/util/compat.hpp +++ b/src/gallium/state_trackers/clover/util/compat.hpp @@ -66,65 +66,81 @@ namespace clover { typedef std::ptrdiff_t difference_type; typedef std::size_t size_type; - vector() : p(NULL), n(0) { + vector() : p(NULL), _size(0), _capacity(0) { } - vector(const vector v) : p(alloc(v.n, v.p, v.n)), n(v.n) { + vector(const vector v) : +p(alloc(v._size, v.p, v._size)), +_size(v._size), _capacity(v._size) { } - vector(const_iterator p, size_type n) : p(alloc(n, p, n)), n(n) { + vector(const_iterator p, size_type n) : +p(alloc(n, p, n)), _size(n), _capacity(n) { } templatetypename C vector(const C v) : -p(alloc(v.size(), *v.begin(), v.size())), n(v.size()) { +p(alloc(v.size(), *v.begin(), v.size())), +_size(v.size()) , _capacity(v.size()) { } ~vector() { -free(n, p); +free(_size, p); } vector operator=(const vector v) { -free(n, p); +free(_size, p); -p = alloc(v.n, v.p, v.n); -n = v.n; +p = alloc(v._size, v.p, v._size); +_size = v._size; +_capacity = v._size; return *this; } void - reserve(size_type m) { -if (n m) { - T *q = alloc(m, p, n); - free(n, p); + reserve(size_type n) { +if (_capacity n) { + T *q = alloc(n, p, _size); + free(_size, p); p = q; - n = m; + _capacity = n; } } void - resize(size_type m, T x = T()) { -size_type n = size(); + resize(size_type n, T x = T()) { +if (n = _size) { + for (size_type i = n; i _size; ++i) + p[i].~T(); -reserve(m); +} else { + reserve(n); -for (size_type i = n; i m; ++i) - new(p[i]) T(x); + for (size_type i = _size; i n; ++i) + new(p[i]) T(x); +} + +_size = n; } void push_back(const T x) { -size_type n = size(); -reserve(n + 1); -new(p[n]) T(x); +reserve(_size + 1); +new(p[_size]) T(x); +++_size; } size_type size() const { -return n; +return _size; + } + + size_type + capacity() const { +return _capacity; } iterator @@ -139,12 +155,12 @@ namespace clover { iterator end() { -return p + n; +return p + _size; } const_iterator end() const { -return p + n; +return p + _size; }
Re: [Mesa-dev] [PATCH] clover: fix piglit cl-api-build-program test
EdB edb+m...@sigluy.net writes: On Sunday, August 17, 2014 11:50:12 PM Francisco Jerez wrote: EdB edb+m...@sigluy.net writes: Hello There is a crash with your version. This one works Oops, sorry for that. It seems like a hack to me to force the kernel reference count to one to keep it from being destroyed... Can you try the attached patch instead on top of my clover-next branch [1]? 8010325eaf and 47e8adea3a are the ones it depends on. [1] http://cgit.freedesktop.org/~currojerez/mesa/log/?h=clover-next It works Thanks Cool, pushed. pgpD_NQunJHEv.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 82538] Super Maryo Chronicles fails with st/mesa assertion failure
https://bugs.freedesktop.org/show_bug.cgi?id=82538 Michel Dänzer mic...@daenzer.net changed: What|Removed |Added CC||mar...@gmail.com --- Comment #3 from Michel Dänzer mic...@daenzer.net --- Bisected it to: commit 734e4946f50c1b83dafdb18ced652abc88e6a246 Author: Marek Olšák marek.ol...@amd.com Date: Fri Jul 11 00:05:44 2014 +0200 mesa: fix crash in st/mesa after deleting a VAO This happens when glGetMultisamplefv (or any other non-draw function) is called, which doesn't invoke the VBO module to update _DrawArrays and the pointer is invalid at that point. However st/mesa still dereferences it to setup vertex buffers == crash. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On 16.08.2014 09:12, Connor Abbott wrote: I know what you might be thinking right now. Wait, *another* IR? Don't we already have like 5 of those, not counting all the driver-specific ones? Isn't this stuff complicated enough already? Well, there are some pretty good reasons to start afresh (again...). In the years we've been using GLSL IR, we've come to realize that, in fact, it's not what we want *at all* to do optimizations on. Did you evaluate using LLVM IR instead of inventing yet another one? -- Earthling Michel Dänzer| http://www.amd.com Libre software enthusiast |Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] clover: fix _logs string creation
On Monday, August 18, 2014 09:20:03 AM Francisco Jerez wrote: EdB edb+m...@sigluy.net writes: compact::string is not \0 terminated. size() need to be used for std::string creation --- src/gallium/state_trackers/clover/core/program.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index e09c3aa..3f504d5 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -61,9 +61,9 @@ program::build(const ref_vectordevice devs, const char *opts) { dev.ir_target(), build_opts(dev), log)); _binaries.insert({ dev, module }); -_logs.insert({ dev, std::string(log.c_str()) }); +_logs.insert({ dev, std::string(log.c_str(), log.size()) }); } catch (const build_error ) { -_logs.insert({ dev, std::string(log.c_str()) }); +_logs.insert({ dev, std::string(log.c_str(), log.size()) }); Both of these should just be using the conversion operator. See attachment. Agreed, I was highlighting the problem. Yours is better. Thanks throw; } } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] clover: stdify compat::vector a little more
On Monday, August 18, 2014 09:29:02 AM Francisco Jerez wrote: EdB edb+m...@sigluy.net writes: make resize work like std::vector reserve take advantage of capacity rename members to be uniform with other class --- src/gallium/state_trackers/clover/core/module.cpp | 2 +- src/gallium/state_trackers/clover/util/compat.hpp | 113 +++--- 2 files changed, 78 insertions(+), 37 deletions(-) This could be a *lot* simpler, see attachment. Looks good to me. Thanks ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] mapi: Inline shared-glapi/Makefile.
On 18/08/14 05:14, Matt Turner wrote: On Sun, Aug 17, 2014 at 1:06 PM, Kristian Høgsberg hoegsb...@gmail.com wrote: On Fri, Aug 15, 2014 at 10:47:06AM -0700, Matt Turner wrote: --- configure.ac | 1 - src/mapi/Makefile.am | 44 --- src/mapi/shared-glapi/Makefile.am | 34 -- src/mesa/Makefile.sources | 3 --- 4 files changed, 41 insertions(+), 41 deletions(-) delete mode 100644 src/mapi/shared-glapi/Makefile.am diff --git a/configure.ac b/configure.ac index dc81c80..97d5394 100644 --- a/configure.ac +++ b/configure.ac @@ -2243,7 +2243,6 @@ AC_CONFIG_FILES([Makefile src/mapi/glapi/Makefile src/mapi/glapi/gen/Makefile src/mapi/glapi/tests/Makefile - src/mapi/shared-glapi/Makefile src/mapi/shared-glapi/tests/Makefile src/mapi/vgapi/Makefile src/mapi/vgapi/vg.pc diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am index ef53803..6b9444a 100644 --- a/src/mapi/Makefile.am +++ b/src/mapi/Makefile.am @@ -1,4 +1,4 @@ -# Copyright © 2013 Intel Corporation +# Copyright © 2013, 2014 Intel Corporation # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the Software), @@ -19,10 +19,46 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SUBDIRS = glapi/gen +SUBDIRS = glapi/gen . + +TOP = $(top_srcdir) + +BUILT_SOURCES = +CLEANFILES = $(BUILT_SOURCES) + +lib_LTLIBRARIES = + +AM_CFLAGS = $(PTHREAD_CFLAGS) +AM_CPPFLAGS =\ + $(DEFINES) \ + $(SELINUX_CFLAGS) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/mapi\ + -I$(top_builddir)/src/mapi + +GLAPI = $(top_srcdir)/src/mapi/glapi +include Makefile.sources +include glapi/gen/glapi_gen.mk if HAVE_SHARED_GLAPI -SUBDIRS += shared-glapi +SUBDIRS += shared-glapi/tests + +BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h + +lib_LTLIBRARIES += shared-glapi/libglapi.la +shared_glapi_libglapi_la_SOURCES = $(MAPI_GLAPI_FILES) +shared_glapi_libglapi_la_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -DMAPI_MODE_GLAPI \ + -DMAPI_ABI_HEADER=\shared-glapi/glapi_mapi_tmp.h\ +shared_glapi_libglapi_la_LIBADD = $(SELINUX_LIBS) +shared_glapi_libglapi_la_LDFLAGS = \ + -no-undefined \ + $(GC_SECTIONS) \ + $(LD_NO_UNDEFINED) + +shared-glapi/glapi_mapi_tmp.h : $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) + $(call glapi_gen_mapi,$,shared-glapi) endif if HAVE_OPENGL @@ -40,3 +76,5 @@ endif if HAVE_OPENVG SUBDIRS += vgapi endif + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/mapi/shared-glapi/Makefile.am b/src/mapi/shared-glapi/Makefile.am deleted file mode 100644 index 330719c..000 --- a/src/mapi/shared-glapi/Makefile.am +++ /dev/null @@ -1,34 +0,0 @@ -# Used by OpenGL ES or when --enable-shared-glapi is specified - -SUBDIRS = . tests - -TOP = $(top_srcdir) -GLAPI = $(top_srcdir)/src/mapi/glapi -include $(top_srcdir)/src/mapi/Makefile.sources - -lib_LTLIBRARIES = libglapi.la -libglapi_la_SOURCES = $(MAPI_GLAPI_FILES) -libglapi_la_LIBADD = $(PTHREAD_LIBS) $(SELINUX_LIBS) You didn't move $(PTHREAD_LIBS) up to shared_glpai_libglapi_la_LIBADD? Right... Emil, do you remember whether PTHREAD_LIBS is needed? PTHREAD_CFLAGS seems sufficient for me, but I have a vague memory that FreeBSD or something needs PTHREAD_LIBS. This seems to be an interesting topic: ldd states that our current pthreads linking is not needed. On the other hand the libglapi.so.0.0 has at least one function(pthreads_once) coming from the pthreads library. At the same time the function is _unused_ by whole of mesa. Not to mention that *BSD people need the pthreads linking as their libc does not provide any pthread* symbols. So in summary, let's keep PTHREAD_LIBS in for now :) -Emil -libglapi_la_LDFLAGS = \ - -no-undefined \ - $(GC_SECTIONS) \ - $(LD_NO_UNDEFINED) - -include $(GLAPI)/gen/glapi_gen.mk -glapi_mapi_tmp.h : $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) - $(call glapi_gen_mapi,$,shared-glapi) - -BUILT_SOURCES = glapi_mapi_tmp.h -CLEANFILES = $(BUILT_SOURCES) - -AM_CFLAGS = $(PTHREAD_CFLAGS) -AM_CPPFLAGS =\ - $(DEFINES) \ - $(SELINUX_CFLAGS) \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src/mapi\ - -I$(top_builddir)/src/mapi
Re: [Mesa-dev] [PATCH 03/19] glx/drisw: add support for DRI2rendererQueryExtension
On 14/08/2014 23:18, Emil Velikov wrote: The extension is used by GLX_MESA_query_renderer, which can be provided for by hardware and software drivers. v2: Use designated initializers. v3: Move drisw_query_renderer_*() to dri2_query_renderer.c This breaks my build (see [1]) I guess something like the attached is needed. Possibly dri2_query_renderer.c needs to be renamed, since it's contents now are used for more than dri[23]. [1] http://tinderbox.x.org/builds/2014-08-16-0006/logs/mesa-mesa/#build From ee9b2d044ebb089bc3daf93fc6b71e167c47841f Mon Sep 17 00:00:00 2001 From: Jon TURNEY jon.tur...@dronecode.org.uk Date: Sun, 17 Aug 2014 17:22:22 +0100 Subject: [PATCH] Fix build since 679c2ef glx/drisw: add support for DRI2rendererQueryExtension, when only building drisw renderer. Signed-off-by: Jon TURNEY jon.tur...@dronecode.org.uk --- src/glx/Makefile.am | 6 +++--- src/glx/dri2_query_renderer.c | 4 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am index cdd898e..23cb794 100644 --- a/src/glx/Makefile.am +++ b/src/glx/Makefile.am @@ -96,7 +96,8 @@ endif if HAVE_DRICOMMON libglx_la_SOURCES += \ xfont.c \ - dri_common.c + dri_common.c \ + dri2_query_renderer.c endif if HAVE_DRI2 @@ -104,8 +105,7 @@ libglx_la_SOURCES += \ dri_glx.c \ XF86dri.c \ dri2_glx.c \ - dri2.c \ - dri2_query_renderer.c + dri2.c endif if HAVE_DRI3 diff --git a/src/glx/dri2_query_renderer.c b/src/glx/dri2_query_renderer.c index 247ec1c..6ccd710 100644 --- a/src/glx/dri2_query_renderer.c +++ b/src/glx/dri2_query_renderer.c @@ -25,7 +25,9 @@ #include glxclient.h #include glx_error.h +#ifdef HAVE_LIBDRM #include dri2.h +#endif #include dri_interface.h #include dri2_priv.h #if defined(HAVE_DRI3) @@ -66,6 +68,7 @@ dri2_convert_glx_query_renderer_attribs(int attribute) return -1; } +#ifdef HAVE_LIBDRM _X_HIDDEN int dri2_query_renderer_integer(struct glx_screen *base, int attribute, unsigned int *value) @@ -103,6 +106,7 @@ dri2_query_renderer_string(struct glx_screen *base, int attribute, return psc-rendererQuery-queryString(psc-driScreen, dri_attribute, value); } +#endif #if defined(HAVE_DRI3) _X_HIDDEN int -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] build: Let install-lib-links.mk handle .la files in subdirectories.
On 18/08/14 05:19, Matt Turner wrote: On Sun, Aug 17, 2014 at 2:39 PM, Emil Velikov emil.l.veli...@gmail.com wrote: On 15/08/14 18:47, Matt Turner wrote: The next patches are going to combine some of the mapi subdirectories' Makefiles into a single Makefile, giving better build parallelism. Hi Matt, I must admit that while I like this patch, I'm not at all a fan of the rest of the series. But I won't object too strongly against the idea. Oh, really? I mean, there's some complexity just in all of the combinations, but I think this is a clean up. It's certainly an improvement in that we don't have Makefiles that build a single source file. After this series if you build GL, ES1, and ES2 all of it happens in parallel including the tests. I shall not be going into mapi anytime soon so it's up-to you to have fun in there. I prefer to get gallium's 'make dist' close to working and clean-up some of the pipe-loader/targets mess that I've created :P Not sure if the extra parallelism will help here as I very rarely build ES* anyway so ;) -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 03/19] glx/drisw: add support for DRI2rendererQueryExtension
On 18/08/14 12:47, Jon TURNEY wrote: On 14/08/2014 23:18, Emil Velikov wrote: The extension is used by GLX_MESA_query_renderer, which can be provided for by hardware and software drivers. v2: Use designated initializers. v3: Move drisw_query_renderer_*() to dri2_query_renderer.c This breaks my build (see [1]) Ouch, I've completely forgot about your recent-ish changes in here. Sorry for the breakage. I guess something like the attached is needed. Possibly dri2_query_renderer.c needs to be renamed, since it's contents now are used for more than dri[23]. My initial plan was to move the functions to dri_common.c, although that caused 'make check' to explode so I've kept them here as per Ian's suggestion. Renaming the file makes sense imho. [1] http://tinderbox.x.org/builds/2014-08-16-0006/logs/mesa-mesa/#build 0001-Fix-build-since-679c2ef-glx-drisw-add-support-for-DR.patch From ee9b2d044ebb089bc3daf93fc6b71e167c47841f Mon Sep 17 00:00:00 2001 From: Jon TURNEY jon.tur...@dronecode.org.uk Date: Sun, 17 Aug 2014 17:22:22 +0100 Subject: [PATCH] Fix build since 679c2ef glx/drisw: add support for DRI2rendererQueryExtension, when only building drisw renderer. Signed-off-by: Jon TURNEY jon.tur...@dronecode.org.uk --- src/glx/Makefile.am | 6 +++--- src/glx/dri2_query_renderer.c | 4 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am index cdd898e..23cb794 100644 --- a/src/glx/Makefile.am +++ b/src/glx/Makefile.am @@ -96,7 +96,8 @@ endif if HAVE_DRICOMMON libglx_la_SOURCES += \ xfont.c \ - dri_common.c + dri_common.c \ + dri2_query_renderer.c endif if HAVE_DRI2 @@ -104,8 +105,7 @@ libglx_la_SOURCES += \ dri_glx.c \ XF86dri.c \ dri2_glx.c \ - dri2.c \ - dri2_query_renderer.c + dri2.c endif if HAVE_DRI3 diff --git a/src/glx/dri2_query_renderer.c b/src/glx/dri2_query_renderer.c index 247ec1c..6ccd710 100644 --- a/src/glx/dri2_query_renderer.c +++ b/src/glx/dri2_query_renderer.c @@ -25,7 +25,9 @@ #include glxclient.h #include glx_error.h +#ifdef HAVE_LIBDRM #include dri2.h +#endif With a couple of small changes, I believe that you should be safe with dropping the above header and the HAVE_LIBDRM guards below. The small changes: - dri*_query_renderer_* into their respective dri*_priv.h - Perhaps move a struct from dri2.h to dri2_priv.h -Emil #include dri_interface.h #include dri2_priv.h #if defined(HAVE_DRI3) @@ -66,6 +68,7 @@ dri2_convert_glx_query_renderer_attribs(int attribute) return -1; } +#ifdef HAVE_LIBDRM _X_HIDDEN int dri2_query_renderer_integer(struct glx_screen *base, int attribute, unsigned int *value) @@ -103,6 +106,7 @@ dri2_query_renderer_string(struct glx_screen *base, int attribute, return psc-rendererQuery-queryString(psc-driScreen, dri_attribute, value); } +#endif #if defined(HAVE_DRI3) _X_HIDDEN int -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Clamp/saturate optimizations v3
v3 of clamp and saturate optimizations Changes since v1: - Only remove the old try_emit_saturate operations after the new optimizations are in place. (Matt, Ian) - Output [min/max](saturate(x),b) instead of saturate([min/max](x,b)) as suggested by Ilia Mirkin. - The change above required some refactoring in the fs/vec4 backend to allow propagation of certain instructions with saturate flag to SEL. For other instructions, we don't propagate saturate instructions, similar to the previous behaviour. Since v2: - Fix comments to reflect we are doing a commutative operation, add missing conditions when optimizing clamp in opt_algebraic pass. - Refactor try_emit_saturate() in i965/fs instead of completely removing it. This fixed a a regression where the changes emitted an (extra) unnecessary saturated mov when the expression generating src can do saturate directly instead. - Fix regression in the i965/vec4 copy-propagate optimization caused by ignoring channels in the propagated instruction. - Count generated loops from the fs/vec4 generator. Results from our shader-db: total instructions in shared programs: 4538627 - 4560104 (0.47%) instructions in affected programs: 45144 - 66621 (47.57%) total loops in shared programs:887 - 711 (-19.84%) GAINED:0 LOST: 36 I modified shader-db a bit to catch loops unrolls. The shaders that show increase in instruction count are all due to the loop unroll pass triggered by this optimization on games that contain looped clamp/saturate operation. The unroll pass also resulted in a few shaders with looped clamp/sat skipping SIMD16 generation. ** No piglit regressions observed ** Abdiel Janulgue (17): i965/vec4/fs: Count loops in shader debug glsl: Add ir_unop_saturate glsl: Add constant evaluation of ir_unop_saturate glsl: Add a pass to lower ir_unop_saturate to clamp(x, 0, 1) ir_to_mesa, glsl_to_tgsi: lower ir_unop_saturate ir_to_mesa, glsl_to_tgsi: Add support for ir_unop_saturate i965/fs: Add support for ir_unop_saturate i965/vec4: Add support for ir_unop_saturate glsl: Implement saturate as ir_unop_saturate glsl: Optimize clamp(x, 0, 1) as saturate(x) glsl: Optimize clamp(x, 0.0, b), where b 1.0 as min(saturate(x),b) glsl: Optimize clamp(x, b, 1.0), where b 0.0 as max(saturate(x),b) i965/fs: Allow propagation of instructions with saturate flag to sel i965/vec4: Allow propagation of instructions with saturate flag to sel ir_to_mesa, glsl_to_tgsi: Remove try_emit_saturate i965/fs: Refactor try_emit_saturate i965/vec4: Remove try_emit_saturate src/glsl/ir.cpp | 2 + src/glsl/ir.h| 1 + src/glsl/ir_builder.cpp | 6 +- src/glsl/ir_constant_expression.cpp | 6 ++ src/glsl/ir_optimization.h | 1 + src/glsl/ir_validate.cpp | 1 + src/glsl/lower_instructions.cpp | 29 src/glsl/opt_algebraic.cpp | 98 ++ src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 1 + src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp| 18 - src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 6 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 27 --- src/mesa/drivers/dri/i965/brw_vec4.h | 2 +- src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 85 +++--- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 6 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 25 ++- src/mesa/program/ir_to_mesa.cpp | 59 +++- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 63 +++-- 18 files changed, 261 insertions(+), 175 deletions(-) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/17] glsl: Add ir_unop_saturate
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/glsl/ir.cpp | 2 ++ src/glsl/ir.h| 1 + src/glsl/ir_validate.cpp | 1 + 3 files changed, 4 insertions(+) diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 4a4d304..ef04ed0 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -255,6 +255,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) case ir_unop_dFdy_fine: case ir_unop_bitfield_reverse: case ir_unop_interpolate_at_centroid: + case ir_unop_saturate: this-type = op0-type; break; @@ -534,6 +535,7 @@ static const char *const operator_strs[] = { bit_count, find_msb, find_lsb, + sat, noise, interpolate_at_centroid, +, diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 18623b9..96c8b0e 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1248,6 +1248,7 @@ enum ir_expression_operation { ir_unop_find_lsb, /*@}*/ + ir_unop_saturate, ir_unop_noise, /** diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index 5b20677..97a581d 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -241,6 +241,7 @@ ir_validate::visit_leave(ir_expression *ir) case ir_unop_log: case ir_unop_exp2: case ir_unop_log2: + case ir_unop_saturate: assert(ir-operands[0]-type-base_type == GLSL_TYPE_FLOAT); assert(ir-type == ir-operands[0]-type); break; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/17] ir_to_mesa, glsl_to_tgsi: lower ir_unop_saturate
Needed when vertex programs doesn't allow saturate Reviewed-by: Matt Turner matts...@gmail.com Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/mesa/program/ir_to_mesa.cpp| 5 - src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 011ffed..e8126b3 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2991,9 +2991,12 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); +GLenum target = _mesa_shader_stage_to_program(prog-_LinkedShaders[i]-Stage); lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP -| ((options-EmitNoPow) ? POW_TO_EXP2 : 0))); +| ((options-EmitNoPow) ? POW_TO_EXP2 : 0) +| ((target == GL_VERTEX_PROGRAM_ARB) ? SAT_TO_CLAMP +: 0))); progress = do_lower_jumps(ir, true, true, options-EmitNoMainReturn, options-EmitNoCont, options-EmitNoLoops) || progress; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 84bdc4f..575da1e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5429,6 +5429,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (!pscreen-get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) lower_offset_arrays(ir); do_mat_op_to_vec(ir); + /* Emit saturates in the vertex shader only if SM 3.0 is supported. */ + bool vs_sm3 = (_mesa_shader_stage_to_program(prog-_LinkedShaders[i]-Stage) == + GL_VERTEX_PROGRAM_ARB) st_context(ctx)-has_shader_model3; lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | @@ -5438,7 +5441,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) CARRY_TO_ARITH | BORROW_TO_ARITH | (options-EmitNoPow ? POW_TO_EXP2 : 0) | - (!ctx-Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0)); + (!ctx-Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | + (vs_sm3 ? SAT_TO_CLAMP : 0)); lower_ubo_reference(prog-_LinkedShaders[i], ir); do_vec_index_to_cond_assign(ir); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/17] i965/fs: Add support for ir_unop_saturate
Reviewed-by: Matt Turner matts...@gmail.com Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 1 + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 2 files changed, 5 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index d98b7eb..cb0a079 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -246,6 +246,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_bit_count: case ir_unop_find_msb: case ir_unop_find_lsb: + case ir_unop_saturate: for (i = 0; i vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 05082ee..c33c46b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -854,6 +854,10 @@ fs_visitor::visit(ir_expression *ir) case ir_unop_find_lsb: emit(FBL(this-result, op[0])); break; + case ir_unop_saturate: + inst = emit(MOV(this-result, op[0])); + inst-saturate = true; + break; case ir_triop_bitfield_extract: /* Note that the instruction's argument order is reversed from GLSL * and the IR. -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/17] glsl: Add a pass to lower ir_unop_saturate to clamp(x, 0, 1)
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/glsl/ir_optimization.h | 1 + src/glsl/lower_instructions.cpp | 29 + 2 files changed, 30 insertions(+) diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index b83c225..1c6f72b 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -40,6 +40,7 @@ #define LDEXP_TO_ARITH 0x100 #define CARRY_TO_ARITH 0x200 #define BORROW_TO_ARITH0x400 +#define SAT_TO_CLAMP 0x800 /** * \see class lower_packing_builtins_visitor diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 176070c..6842853 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -41,6 +41,7 @@ * - BITFIELD_INSERT_TO_BFM_BFI * - CARRY_TO_ARITH * - BORROW_TO_ARITH + * - SAT_TO_CLAMP * * SUB_TO_ADD_NEG: * --- @@ -104,6 +105,10 @@ * * Converts ir_borrow into (x y). * + * SAT_TO_CLAMP: + * - + * Converts ir_unop_saturate into min(max(x, 0.0), 1.0) + * */ #include main/core.h /* for M_LOG2E */ @@ -139,6 +144,7 @@ private: void ldexp_to_arith(ir_expression *); void carry_to_arith(ir_expression *); void borrow_to_arith(ir_expression *); + void sat_to_clamp(ir_expression *); }; } /* anonymous namespace */ @@ -484,6 +490,24 @@ lower_instructions_visitor::borrow_to_arith(ir_expression *ir) this-progress = true; } +void +lower_instructions_visitor::sat_to_clamp(ir_expression *ir) +{ + /* Translates +* ir_unop_saturate x +* into +* ir_binop_min (ir_binop_max(x, 0.0), 1.0) +*/ + + ir-operation = ir_binop_min; + ir-operands[0] = new(ir) ir_expression(ir_binop_max, ir-operands[0]-type, + ir-operands[0], + new(ir) ir_constant(0.0f)); + ir-operands[1] = new(ir) ir_constant(1.0f); + + this-progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -540,6 +564,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) borrow_to_arith(ir); break; + case ir_unop_saturate: + if (lowering(SAT_TO_CLAMP)) + sat_to_clamp(ir); + break; + default: return visit_continue; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/17] i965/vec4: Allow propagation of instructions with saturate flag to sel
When sel conditon is bounded within 0 and 1.0. This allows code as: mov.sat a b sel.ge dst a 0.25F To be propagated as: sel.ge.sat dst b 0.25F v3: - Syntax clarifications in inst-saturate assignment - Remove extra parenthesis when assigning src_reg value from copy_entry (Matt Turner) v4: - Take channels into consideration when propagating saturated instructions. Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 85 +++--- 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 37ca661..fe47b0f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -36,13 +36,17 @@ extern C { namespace brw { +struct copy_entry { + src_reg *value[4]; + int saturatemask; +}; + static bool is_direct_copy(vec4_instruction *inst) { return (inst-opcode == BRW_OPCODE_MOV !inst-predicate inst-dst.file == GRF - !inst-saturate !inst-dst.reladdr !inst-src[0].reladdr inst-dst.type == inst-src[0].type); @@ -74,16 +78,16 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch) static bool try_constant_propagate(struct brw_context *brw, vec4_instruction *inst, - int arg, src_reg *values[4]) + int arg, struct copy_entry *entry) { /* For constant propagation, we only handle the same constant * across all 4 channels. Some day, we should handle the 8-bit * float vector format, which would let us constant propagate * vectors better. */ - src_reg value = *values[0]; + src_reg value = *entry-value[0]; for (int i = 1; i 4; i++) { - if (!value.equals(*values[i])) + if (!value.equals(*entry-value[i])) return false; } @@ -213,22 +217,22 @@ is_logic_op(enum opcode opcode) static bool try_copy_propagate(struct brw_context *brw, vec4_instruction *inst, - int arg, src_reg *values[4]) + int arg, struct copy_entry *entry, int reg) { /* For constant propagation, we only handle the same constant * across all 4 channels. Some day, we should handle the 8-bit * float vector format, which would let us constant propagate * vectors better. */ - src_reg value = *values[0]; + src_reg value = *entry-value[0]; for (int i = 1; i 4; i++) { /* This is equals() except we don't care about the swizzle. */ - if (value.file != values[i]-file || - value.reg != values[i]-reg || - value.reg_offset != values[i]-reg_offset || - value.type != values[i]-type || - value.negate != values[i]-negate || - value.abs != values[i]-abs) { + if (value.file != entry-value[i]-file || + value.reg != entry-value[i]-reg || + value.reg_offset != entry-value[i]-reg_offset || + value.type != entry-value[i]-type || + value.negate != entry-value[i]-negate || + value.abs != entry-value[i]-abs) { return false; } } @@ -239,7 +243,7 @@ try_copy_propagate(struct brw_context *brw, vec4_instruction *inst, */ int s[4]; for (int i = 0; i 4; i++) { - s[i] = BRW_GET_SWZ(values[i]-swizzle, + s[i] = BRW_GET_SWZ(entry-value[i]-swizzle, BRW_GET_SWZ(inst-src[arg].swizzle, i)); } value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]); @@ -300,6 +304,25 @@ try_copy_propagate(struct brw_context *brw, vec4_instruction *inst, if (value.equals(inst-src[arg])) return false; + /* Limit saturate propagation only to SEL with src1 bounded within 1.0 and 1.0 +* otherwise, skip copy propagate altogether +*/ + if (entry-saturatemask (1 arg)) { + switch(inst-opcode) { + case BRW_OPCODE_SEL: + if (inst-src[1].file != IMM || + inst-src[1].fixed_hw_reg.dw1.f 0.0 || + inst-src[1].fixed_hw_reg.dw1.f 1.0) { +return false; + } + if (!inst-saturate) +inst-saturate = true; + break; + default: + return false; + } + } + value.type = inst-src[arg].type; inst-src[arg] = value; return true; @@ -309,9 +332,9 @@ bool vec4_visitor::opt_copy_propagation() { bool progress = false; - src_reg *cur_value[virtual_grf_reg_count][4]; + struct copy_entry entries[virtual_grf_reg_count]; - memset(cur_value, 0, sizeof(cur_value)); + memset(entries, 0, sizeof(entries)); foreach_in_list(vec4_instruction, inst, instructions) { /* This pass only works on basic blocks. If there's flow @@ -322,7 +345,7 @@ vec4_visitor::opt_copy_propagation() * src/glsl/opt_copy_propagation.cpp to track available
[Mesa-dev] [PATCH 17/17] i965/vec4: Remove try_emit_saturate
Now that saturate is implemented natively as an instruction, we can cut down on unneeded functionality. Reviewed-by: Matt Turner matts...@gmail.com Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/mesa/drivers/dri/i965/brw_vec4.h | 1 - src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 21 - 2 files changed, 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c333baa..e5ad7af 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -563,7 +563,6 @@ public: src_reg orig_src, int base_offset); - bool try_emit_sat(ir_expression *ir); bool try_emit_mad(ir_expression *ir); bool try_emit_b2f_of_compare(ir_expression *ir); void resolve_ud_negate(src_reg *reg); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 2e7a85d..95d46c2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1078,24 +1078,6 @@ vec4_visitor::visit(ir_function *ir) } bool -vec4_visitor::try_emit_sat(ir_expression *ir) -{ - ir_rvalue *sat_src = ir-as_rvalue_to_saturate(); - if (!sat_src) - return false; - - sat_src-accept(this); - src_reg src = this-result; - - this-result = src_reg(this, ir-type); - vec4_instruction *inst; - inst = emit(MOV(dst_reg(this-result), src)); - inst-saturate = true; - - return true; -} - -bool vec4_visitor::try_emit_mad(ir_expression *ir) { /* 3-src instructions were introduced in gen6. */ @@ -1228,9 +1210,6 @@ vec4_visitor::visit(ir_expression *ir) dst_reg result_dst; vec4_instruction *inst; - if (try_emit_sat(ir)) - return; - if (ir-operation == ir_binop_add) { if (try_emit_mad(ir)) return; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 13/17] i965/fs: Allow propagation of instructions with saturate flag to sel
When sel conditon is bounded within 0 and 1.0. This allows code as: mov.sat a b sel.ge dst a 0.25F To be propagated as: sel.ge.sat dst b 0.25F v3: Syntax clarifications in inst-saturate assignment (Matt Turner) Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 09f51bc..7e4eab7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -43,6 +43,7 @@ struct acp_entry : public exec_node { fs_reg dst; fs_reg src; enum opcode opcode; + bool saturate; }; struct block_data { @@ -347,11 +348,26 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) return false; } + if (entry-saturate) { + switch(inst-opcode) { + case BRW_OPCODE_SEL: + if (inst-src[1].file != IMM || + inst-src[1].fixed_hw_reg.dw1.f 0.0 || + inst-src[1].fixed_hw_reg.dw1.f 1.0) { +return false; + } + break; + default: + return false; + } + } + inst-src[arg].file = entry-src.file; inst-src[arg].reg = entry-src.reg; inst-src[arg].reg_offset = entry-src.reg_offset; inst-src[arg].subreg_offset = entry-src.subreg_offset; inst-src[arg].stride *= entry-src.stride; + inst-saturate = inst-saturate || entry-saturate; if (!inst-src[arg].abs) { inst-src[arg].abs = entry-src.abs; @@ -514,7 +530,6 @@ can_propagate_from(fs_inst *inst) inst-src[0].file == UNIFORM || inst-src[0].file == IMM) inst-src[0].type == inst-dst.type - !inst-saturate !inst-is_partial_write()); } @@ -569,6 +584,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, entry-dst = inst-dst; entry-src = inst-src[0]; entry-opcode = inst-opcode; + entry-saturate = inst-saturate; acp[entry-dst.reg % ACP_HASH_SIZE].push_tail(entry); } else if (inst-opcode == SHADER_OPCODE_LOAD_PAYLOAD inst-dst.file == GRF) { -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/17] i965/vec4/fs: Count loops in shader debug
Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 6 -- src/mesa/drivers/dri/i965/brw_vec4.h | 1 + src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 6 -- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index a243003..c4e6c6d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1483,6 +1483,7 @@ void fs_generator::generate_code(exec_list *instructions) { int start_offset = p-next_insn_offset; + int loop_count = 0; struct annotation_info annotation; memset(annotation, 0, sizeof(annotation)); @@ -1743,6 +1744,7 @@ fs_generator::generate_code(exec_list *instructions) case BRW_OPCODE_WHILE: brw_WHILE(p); + loop_count++; break; case SHADER_OPCODE_RCP: @@ -1970,9 +1972,9 @@ fs_generator::generate_code(exec_list *instructions) fprintf(stderr, Native code for blorp program (SIMD%d dispatch):\n, dispatch_width); } - fprintf(stderr, SIMD%d shader: %d instructions. Compacted %d to %d + fprintf(stderr, SIMD%d shader: %d instructions. %d loops. Compacted %d to %d bytes (%.0f%%)\n, - dispatch_width, before_size / 16, before_size, after_size, + dispatch_width, before_size / 16, loop_count, before_size, after_size, 100.0f * (before_size - after_size) / before_size); const struct gl_program *prog = fp ? fp-Base : NULL; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c59d24f..c333baa 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -699,6 +699,7 @@ private: void *mem_ctx; const bool debug_flag; + int loop_count; }; } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 1b1e647..b8948c3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1188,6 +1188,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, case BRW_OPCODE_WHILE: brw_WHILE(p); + loop_count++; break; case SHADER_OPCODE_RCP: @@ -1318,6 +1319,7 @@ vec4_generator::generate_code(exec_list *instructions) { struct annotation_info annotation; memset(annotation, 0, sizeof(annotation)); + loop_count = 0; cfg_t *cfg = NULL; if (unlikely(debug_flag)) @@ -1372,9 +1374,9 @@ vec4_generator::generate_code(exec_list *instructions) } else { fprintf(stderr, Native code for vertex program %d:\n, prog-Id); } - fprintf(stderr, vec4 shader: %d instructions. Compacted %d to %d + fprintf(stderr, vec4 shader: %d instructions. %d loops. Compacted %d to %d bytes (%.0f%%)\n, - before_size / 16, before_size, after_size, + before_size / 16, loop_count, before_size, after_size, 100.0f * (before_size - after_size) / before_size); dump_assembly(p-store, annotation.ann_count, annotation.ann, brw, prog); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/17] glsl: Implement saturate as ir_unop_saturate
Now that we have the ir_unop_saturate implemented as a single instruction, generate the correct simplified expression. Reviewed-by: Matt Turner matts...@gmail.com Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/glsl/ir_builder.cpp | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/glsl/ir_builder.cpp b/src/glsl/ir_builder.cpp index f039414..a2f6f29 100644 --- a/src/glsl/ir_builder.cpp +++ b/src/glsl/ir_builder.cpp @@ -271,11 +271,7 @@ clamp(operand a, operand b, operand c) ir_expression * saturate(operand a) { - void *mem_ctx = ralloc_parent(a.val); - - return expr(ir_binop_max, - expr(ir_binop_min, a, new(mem_ctx) ir_constant(1.0f)), - new(mem_ctx) ir_constant(0.0f)); + return expr(ir_unop_saturate, a); } ir_expression * -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/17] glsl: Optimize clamp(x, 0.0, b), where b 1.0 as min(saturate(x), b)
v2: - Output min(saturate(x),b) instead of saturate(min(x,b)) suggested by Ilia Mirkin - Make sure we do component-wise comparison for vectors (Ian Romanick) v3: - Add missing condition where the outer constant value is zero and inner constant is 1 - Fix comments to reflect we are doing a commutative operation (Matt Turner) Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/glsl/opt_algebraic.cpp | 39 +++ 1 file changed, 39 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 4b052933..6dfb681 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -110,6 +110,33 @@ is_vec_basis(ir_constant *ir) return (ir == NULL) ? false : ir-is_basis(); } +static inline bool +is_valid_vec_const(ir_constant *ir) +{ + if (ir == NULL) + return false; + + if (!ir-type-is_scalar() !ir-type-is_vector()) + return false; + + return true; +} + +static inline bool +is_less_than_one(ir_constant *ir) +{ + if (!is_valid_vec_const(ir)) + return false; + + unsigned component = 0; + for (int c = 0; c ir-type-vector_elements; c++) { + if (ir-get_float_component(c) 1.0f) + component++; + } + + return (component == ir-type-vector_elements); +} + static void update_type(ir_expression *ir) { @@ -645,6 +672,18 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if ((outer_const-is_one() inner_val_a-is_zero()) || (inner_val_a-is_one() outer_const-is_zero())) return saturate(inner_val_b); + +/* Found a {min|max} ({max|min} (x, 0.0), b) where b 1.0 + * and its variations + */ +if (is_less_than_one(outer_const) inner_val_b-is_zero()) + return expr(ir_binop_min, saturate(inner_val_a), outer_const); + +if (!inner_val_b-as_constant()) + continue; + +if (is_less_than_one(inner_val_b-as_constant()) outer_const-is_zero()) + return expr(ir_binop_min, saturate(inner_val_a), inner_val_b); } } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/17] ir_to_mesa, glsl_to_tgsi: Add support for ir_unop_saturate
Reviewed-by: Matt Turner matts...@gmail.com Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/mesa/program/ir_to_mesa.cpp| 6 ++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 ++ 2 files changed, 12 insertions(+) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index e8126b3..f212aed 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1171,6 +1171,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit(ir, OPCODE_DDY, result_dst, op[0]); break; + case ir_unop_saturate: { + ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV, + result_dst, op[0]); + inst-saturate = true; + break; + } case ir_unop_noise: { const enum prog_opcode opcode = prog_opcode(OPCODE_NOISE1 diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 575da1e..55b9940 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1460,6 +1460,12 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_cos_reduced: emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; + case ir_unop_saturate: { + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + inst-saturate = true; + break; + } case ir_unop_dFdx: case ir_unop_dFdx_coarse: -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/17] glsl: Optimize clamp(x, 0, 1) as saturate(x)
v2: - Check that the base type is float (Ian Romanick) v3: - Make sure comments reflect that we are doing a commutative operation - Add missing condition where the inner constant is 1.0 and outer constant is 0.0 - Make indexing of operands easier to read (Matt Turner) Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/glsl/opt_algebraic.cpp | 36 1 file changed, 36 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index ac7514a..4b052933 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -614,6 +614,42 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) break; + case ir_binop_min: + case ir_binop_max: + if (ir-type-base_type != GLSL_TYPE_FLOAT) + break; + + /* Replace min(max) operations and its commutative combinations with + * a saturate operation + */ + for (int op = 0; op 2; op++) { + ir_expression *minmax = op_expr[op]; + ir_constant *outer_const = op_const[1 - op]; + ir_expression_operation op_cond = (ir-operation == ir_binop_max) ? +ir_binop_min : ir_binop_max; + + if (!minmax || !outer_const || (minmax-operation != op_cond)) +continue; + + /* Found a min(max) combination. Now try to see if its operands + * meet our conditions that we can do just a single saturate operation + */ + for (int minmax_op = 0; minmax_op 2; minmax_op++) { +ir_rvalue *inner_val_a = minmax-operands[minmax_op]; +ir_rvalue *inner_val_b = minmax-operands[1 - minmax_op]; + +if (!inner_val_a || !inner_val_b) + continue; + +/* Found a {min|max} ({max|min} (x, 0.0), 1.0) operation and its variations */ +if ((outer_const-is_one() inner_val_a-is_zero()) || +(inner_val_a-is_one() outer_const-is_zero())) + return saturate(inner_val_b); + } + } + + break; + case ir_unop_rcp: if (op_expr[0] op_expr[0]-operation == ir_unop_rcp) return op_expr[0]-operands[0]; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/17] glsl: Add constant evaluation of ir_unop_saturate
v2: Use CLAMP macro (Ian Romanick) Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/glsl/ir_constant_expression.cpp | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index 9606021..1e8b3a3 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -1469,6 +1469,12 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) } break; + case ir_unop_saturate: + for (unsigned c = 0; c components; c++) { + data.f[c] = CLAMP(op[0]-value.f[c], 0.0f, 1.0f); + } + break; + case ir_triop_bitfield_extract: { int offset = op[1]-value.i[0]; int bits = op[2]-value.i[0]; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/17] glsl: Optimize clamp(x, b, 1.0), where b 0.0 as max(saturate(x), b)
v2: - Output max(saturate(x),b) instead of saturate(max(x,b)) - Make sure we do component-wise comparison for vectors (Ian Romanick) v3: - Add missing condition where the outer constant value is 0.0 and inner constant is 1.0. - Fix comments to show that the optimization is a commutative operation (Matt Turner) Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/glsl/opt_algebraic.cpp | 23 +++ 1 file changed, 23 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 6dfb681..447618f 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -137,6 +137,21 @@ is_less_than_one(ir_constant *ir) return (component == ir-type-vector_elements); } +static inline bool +is_greater_than_zero(ir_constant *ir) +{ + if (!is_valid_vec_const(ir)) + return false; + + unsigned component = 0; + for (int c = 0; c ir-type-vector_elements; c++) { + if (ir-get_float_component(c) 0.0f) + component++; + } + + return (component == ir-type-vector_elements); +} + static void update_type(ir_expression *ir) { @@ -684,6 +699,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if (is_less_than_one(inner_val_b-as_constant()) outer_const-is_zero()) return expr(ir_binop_min, saturate(inner_val_a), inner_val_b); + +/* Found a {min|max} ({max|min} (x, b), 1.0), where b 0.0 + * and its variations + */ +if (outer_const-is_one() is_greater_than_zero(inner_val_b-as_constant())) + return expr(ir_binop_max, saturate(inner_val_a), inner_val_b); +if (inner_val_b-as_constant()-is_one() is_greater_than_zero(outer_const)) + return expr(ir_binop_max, saturate(inner_val_a), outer_const); } } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 16/17] i965/fs: Refactor try_emit_saturate
v3: Since the fs backend can emit saturate as a separate instruction, there is no need to detect for min/max instructions and to rewrite the instruction tree accordingly. On the other hand, we don't need to emit a separate saturated mov either when the expression generating src can do saturate directly. Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 23 --- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index c33c46b..aeb076a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -267,17 +267,14 @@ fs_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg d } } -/* Instruction selection: Produce a MOV.sat instead of - * MIN(MAX(val, 0), 1) when possible. - */ bool fs_visitor::try_emit_saturate(ir_expression *ir) { - ir_rvalue *sat_val = ir-as_rvalue_to_saturate(); - - if (!sat_val) + if (ir-operation != ir_unop_saturate) return false; + ir_rvalue *sat_val = ir-operands[0]; + fs_inst *pre_inst = (fs_inst *) this-instructions.get_tail(); sat_val-accept(this); @@ -285,21 +282,17 @@ fs_visitor::try_emit_saturate(ir_expression *ir) fs_inst *last_inst = (fs_inst *) this-instructions.get_tail(); - /* If the last instruction from our accept() didn't generate our -* src, generate a saturated MOV + /* If the last instruction from our accept() generated our +* src, just set the saturate flag instead of emmitting a separate mov. */ fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); - if (!modify || modify-regs_written != 1) { - this-result = fs_reg(this, ir-type); - fs_inst *inst = emit(MOV(this-result, src)); - inst-saturate = true; - } else { + if (modify modify-regs_written == 1) { modify-saturate = true; this-result = src; + return true; } - - return true; + return false; } bool -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/17] i965/vec4: Add support for ir_unop_saturate
Reviewed-by: Matt Turner matts...@gmail.com Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f22d38d..2e7a85d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1389,6 +1389,10 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_find_lsb: emit(FBL(result_dst, op[0])); break; + case ir_unop_saturate: + inst = emit(MOV(result_dst, op[0])); + inst-saturate = true; + break; case ir_unop_noise: unreachable(not reached: should be handled by lower_noise); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 15/17] ir_to_mesa, glsl_to_tgsi: Remove try_emit_saturate
Now that saturate is implemented natively as instruction, we can cut down on unneeded functionality. Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com --- src/mesa/program/ir_to_mesa.cpp| 48 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 51 -- 2 files changed, 99 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index f212aed..325946f 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -311,7 +311,6 @@ public: int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, int mul_operand); - bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -866,50 +865,6 @@ ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) return true; } -bool -ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) -{ - /* Saturates were only introduced to vertex programs in -* NV_vertex_program3, so don't give them to drivers in the VP. -*/ - if (this-prog-Target == GL_VERTEX_PROGRAM_ARB) - return false; - - ir_rvalue *sat_src = ir-as_rvalue_to_saturate(); - if (!sat_src) - return false; - - sat_src-accept(this); - src_reg src = this-result; - - /* If we generated an expression instruction into a temporary in -* processing the saturate's operand, apply the saturate to that -* instruction. Otherwise, generate a MOV to do the saturate. -* -* Note that we have to be careful to only do this optimization if -* the instruction in question was what generated src-result. For -* example, ir_dereference_array might generate a MUL instruction -* to create the reladdr, and return us a src reg using that -* reladdr. That MUL result is not the value we're trying to -* saturate. -*/ - ir_expression *sat_src_expr = sat_src-as_expression(); - ir_to_mesa_instruction *new_inst; - new_inst = (ir_to_mesa_instruction *)this-instructions.get_tail(); - if (sat_src_expr (sat_src_expr-operation == ir_binop_mul || - sat_src_expr-operation == ir_binop_add || - sat_src_expr-operation == ir_binop_dot)) { - new_inst-saturate = true; - } else { - this-result = get_temp(ir-type); - ir_to_mesa_instruction *inst; - inst = emit(ir, OPCODE_MOV, dst_reg(this-result), src); - inst-saturate = true; - } - - return true; -} - void ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr) @@ -1072,9 +1027,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir) return; } - if (try_emit_sat(ir)) - return; - if (ir-operation == ir_quadop_vector) { this-emit_swz(ir); return; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 55b9940..2946286 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -446,7 +446,6 @@ public: int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, int mul_operand); - bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -1270,53 +1269,6 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan return true; } -bool -glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) -{ - /* Emit saturates in the vertex shader only if SM 3.0 is supported. -*/ - if (this-prog-Target == GL_VERTEX_PROGRAM_ARB - !st_context(this-ctx)-has_shader_model3) { - return false; - } - - ir_rvalue *sat_src = ir-as_rvalue_to_saturate(); - if (!sat_src) - return false; - - sat_src-accept(this); - st_src_reg src = this-result; - - /* If we generated an expression instruction into a temporary in -* processing the saturate's operand, apply the saturate to that -* instruction. Otherwise, generate a MOV to do the saturate. -* -* Note that we have to be careful to only do this optimization if -* the instruction in question was what generated src-result. For -* example, ir_dereference_array might generate a MUL instruction -* to create the reladdr, and return us a src reg using that -* reladdr. That MUL result is not the value we're trying to -* saturate. -*/ - ir_expression *sat_src_expr = sat_src-as_expression(); - if (sat_src_expr (sat_src_expr-operation == ir_binop_mul || - sat_src_expr-operation == ir_binop_add || - sat_src_expr-operation == ir_binop_dot)) { - glsl_to_tgsi_instruction *new_inst; - new_inst = (glsl_to_tgsi_instruction *)this-instructions.get_tail(); - new_inst-saturate = true; - } else { - this-result = get_temp(ir-type); - st_dst_reg result_dst =
Re: [Mesa-dev] [PATCH 10/19] auxiliary/os: introduce os_get_total_physical_memory helper function
On 14/08/2014 23:18, Emil Velikov wrote: Cc: Alexander von Gluck IV kallis...@unixzen.com Signed-off-by: Emil Velikov emil.l.veli...@gmail.com --- src/gallium/auxiliary/os/os_misc.c | 64 ++ src/gallium/auxiliary/os/os_misc.h | 7 + 2 files changed, 71 insertions(+) Since this #errors on unknown platforms, teach it about the existence of Cygwin. From 03e0df4455810e255c22a0532b9e66dcc3d60a1d Mon Sep 17 00:00:00 2001 From: Jon TURNEY jon.tur...@dronecode.org.uk Date: Sun, 17 Aug 2014 17:21:27 +0100 Subject: [PATCH] Teach os_get_physical_memory about Cygwin Signed-off-by: Jon TURNEY jon.tur...@dronecode.org.uk --- src/gallium/auxiliary/os/os_misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/os/os_misc.c b/src/gallium/auxiliary/os/os_misc.c index 3846a9a..ef84c79 100644 --- a/src/gallium/auxiliary/os/os_misc.c +++ b/src/gallium/auxiliary/os/os_misc.c @@ -47,7 +47,7 @@ #endif -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_CYGWIN) # include unistd.h #elif defined(PIPE_OS_APPLE) || defined(PIPE_OS_BSD) # include sys/sysctl.h @@ -111,7 +111,7 @@ os_get_option(const char *name) bool os_get_total_physical_memory(uint64_t *size) { -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_CYGWIN) const long phys_pages = sysconf(_SC_PHYS_PAGES); const long page_size = sysconf(_SC_PAGE_SIZE); -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 10/19] auxiliary/os: introduce os_get_total_physical_memory helper function
On 18/08/14 13:20, Jon TURNEY wrote: On 14/08/2014 23:18, Emil Velikov wrote: Cc: Alexander von Gluck IV kallis...@unixzen.com Signed-off-by: Emil Velikov emil.l.veli...@gmail.com --- src/gallium/auxiliary/os/os_misc.c | 64 ++ src/gallium/auxiliary/os/os_misc.h | 7 + 2 files changed, 71 insertions(+) Since this #errors on unknown platforms, teach it about the existence of Cygwin. 0001-Teach-os_get_physical_memory-about-Cygwin.patch From 03e0df4455810e255c22a0532b9e66dcc3d60a1d Mon Sep 17 00:00:00 2001 From: Jon TURNEY jon.tur...@dronecode.org.uk Date: Sun, 17 Aug 2014 17:21:27 +0100 Subject: [PATCH] Teach os_get_physical_memory about Cygwin Signed-off-by: Jon TURNEY jon.tur...@dronecode.org.uk I was under the strange impression that the p_config.h will set PIPE_OS_LINUX for Cygwin. It seem like I got confused with PIPE_OS_UNIX. Reviewed-by: Emil Velikov emil.l.veli...@gmail.com --- src/gallium/auxiliary/os/os_misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/os/os_misc.c b/src/gallium/auxiliary/os/os_misc.c index 3846a9a..ef84c79 100644 --- a/src/gallium/auxiliary/os/os_misc.c +++ b/src/gallium/auxiliary/os/os_misc.c @@ -47,7 +47,7 @@ #endif -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_CYGWIN) # include unistd.h #elif defined(PIPE_OS_APPLE) || defined(PIPE_OS_BSD) # include sys/sysctl.h @@ -111,7 +111,7 @@ os_get_option(const char *name) bool os_get_total_physical_memory(uint64_t *size) { -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_CYGWIN) const long phys_pages = sysconf(_SC_PHYS_PAGES); const long page_size = sysconf(_SC_PAGE_SIZE); -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] clover: unsure compat::string is \0 terminated
EdB edb+m...@sigluy.net writes: otherwise c_str() is not safe --- src/gallium/state_trackers/clover/util/compat.hpp | 54 --- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp index 6f0f7cc..7ca1f85 100644 --- a/src/gallium/state_trackers/clover/util/compat.hpp +++ b/src/gallium/state_trackers/clover/util/compat.hpp @@ -197,7 +197,7 @@ namespace clover { return _p[i]; } - private: + protected: iterator _p; //memory array size_type _s; //size size_type _c; //capacity @@ -306,18 +306,56 @@ namespace clover { class string : public vectorchar { public: - string() : vector() { + string() : vector(0, 1) { +_p[_s - 1] = '\0'; } - string(const char *p) : vector(p, std::strlen(p)) { + string(const char *p) : vector(p, std::strlen(p) + 1) { +_p[_s - 1] = '\0'; } templatetypename C - string(const C v) : vector(v) { + string(const C v) : vector(*v.begin(), v.size() + 1) { +_p[_s - 1] = '\0'; } - operator std::string() const { -return std::string(begin(), end()); + void + reserve(size_type m) { +vector::reserve(m + 1); + } + + void + resize(size_type m, char x = '\0') { +vector::resize(m + 1, x); +_p[_s - 1] = '\0'; + } + + void + push_back(char x) { +reserve(_s + 1); +_p[_s - 1] = x; +_p[_s] = '\0'; +++_s; + } + + size_type + size() const { +return _s - 1; + } + + size_type + capacity() const { +return _c - 1; + } + + iterator + end() { +return _p + size(); + } + + const_iterator + end() const { +return _p + size(); } At this point where all methods from the base class need to be redefined it probably stops making sense to use inheritance instead of aggregation. Once we've done that fixing c_str() gets a lot easier (two lines of code) because we can just declare the container as mutable and fix up the NULL terminator when c_str() is called. Both changes attached. const char * @@ -325,6 +363,10 @@ namespace clover { return begin(); } + operator std::string() const { +return std::string(begin(), end()); + } + const char * find(const string s) const { for (size_t i = 0; i + s.size() size(); ++i) { -- 2.0.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev From e1e97e017f25f4ed1c75bae71095ffa116374654 Mon Sep 17 00:00:00 2001 From: Francisco Jerez curroje...@riseup.net Date: Mon, 18 Aug 2014 15:21:52 +0300 Subject: [PATCH 1/2] clover/util: Implement compat::string using aggregation instead of inheritance. --- src/gallium/state_trackers/clover/util/compat.hpp | 76 +-- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp index a4e3938..e0ab965 100644 --- a/src/gallium/state_trackers/clover/util/compat.hpp +++ b/src/gallium/state_trackers/clover/util/compat.hpp @@ -280,20 +280,83 @@ namespace clover { size_t offset; }; - class string : public vectorchar { + class string { public: - string() : vector() { + typedef char *iterator; + typedef const char *const_iterator; + typedef char value_type; + typedef char reference; + typedef const char const_reference; + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + + string() : v() { } - string(const char *p) : vector(p, std::strlen(p)) { + string(const char *p) : v(p, std::strlen(p)) { } templatetypename C - string(const C v) : vector(v) { + string(const C v) : v(v) { } operator std::string() const { -return std::string(begin(), end()); +return std::string(v.begin(), v.end()); + } + + void + reserve(size_type n) { +v.reserve(n); + } + + void + resize(size_type n, char x = char()) { +v.resize(n, x); + } + + void + push_back(char x) { +v.push_back(x); + } + + size_type + size() const { +return v.size(); +
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
Am 16.08.2014 02:12, schrieb Connor Abbott: I know what you might be thinking right now. Wait, *another* IR? Don't we already have like 5 of those, not counting all the driver-specific ones? Isn't this stuff complicated enough already? Well, there are some pretty good reasons to start afresh (again...). In the years we've been using GLSL IR, we've come to realize that, in fact, it's not what we want *at all* to do optimizations on. Ian has done a talk at FOSDEM that highlights some of the problems they've run into: https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e But here's the summary: * GLSL IR is way too much of a memory hog, since it has to make a new variable for each temporary the compiler creates and then each time you want to dereference that temporary you need to create an ir_dereference_variable that points to it which is also very cache-unfriendly (downright cache-mean!). * The expression trees were originally added so that we could do pattern matching to automatically optimize things, but this turned out to be both very difficult to do and not very helpful. Instead, all it does is add more complexity to the IR without much benefit - with SSA or having proper use-def chains, we could get back what the trees give us while also being able to do lots more optimizations. * We don't have the concept of basic blocks in GLSL IR, which makes a lot of optimizations harder because they were originally designed with basic blocks in mind - take, for example, my SSA series. I had to map a whole lot of concepts that were based on the control flow graph to this tree of statements that GLSL IR uses, and the end result wound up looking nothing at all like the original paper. This problem gets even worse for things like e.g. Global Code Motion that depend upon having the dominance tree. I originally wanted to modify GLSL IR to fix these problems by adding new instruction types that would address these issues and then converting back and forth between the old and the new form, but I realized that fixing all the problems would basically mean a complete rewrite - and if that's the case, then why don't we start from scratch? So I took Ken's suggestions and started designing, and then at Intel over the summer started implementing, a completely new IR which I call NIR that's at a lower level than GLSL IR, but still high-level enough to be mostly device-independant (different drivers may have different passes and different ways of lowering e.g. matrix multiplies) so that we can do generic optimizations on it. Having support for SSA from the beginning was also a must, because lots of optimisations that we really want for cleaning up DX9-translated games are either a lot easier in or made possible by SSA. I also made the decision for it to be typeless, because that's what the cool kids are all doing :) and for a lower-level, flat IR it seemed like the thing to do (it could have gone either way, though). So the key design points of NIR (pronounced either like near as in NIR is near! or to rhyme with burr) are: * It's flat (no expression trees) * It's typeless * Modifiers (abs, negate, saturate), swizzles, and write masks are part of ALU instructions * It includes enough GLSL-like things (variables that you can load from or store to, function calls) to be hardware-agnostic (although we don't have a way to represent matrix multiplies right now, but that could easily be added) to be able to do optimizations at a high level, while having lowering passes that convert variables to registers and input/output/uniform loads/stores that will open up more opportunities for optimization and save memory while being more hardware-specific. * Control flow consists of a tree of if statements and loops, like in GLSL IR, except the leaves of the tree are now basic blocks instead of instructions. Also, each basic block keeps track of its successors and predecessors, so the control flow graph is explicit in the IR. * SSA is natively supported, and SSA uses point directly to the SSA definition, which means that the use-def chains are always there, and def-use chains are kept by tracking the set of all uses for each definition. * It's written in C. (see the README in patch 3 and nir.h in patch 4 for more details) Some things that are missing or could be improved: * There's currently no alias tracking for inputs, outputs, and uniforms. This is especially important for uniforms because we don't pack them like we pack inputs and outputs. * We need a way to represent matrix multiplies so that we can do matrix-flipping optimizations
Re: [Mesa-dev] [PATCH 1/9] glsl: Optimize min/max expression trees
On 08/14/2014 04:33 AM, Ian Romanick wrote: On 07/29/2014 02:36 AM, Petri Latvala wrote: Add an optimization pass that drops min/max expression operands that can be proven to not contribute to the final result. The algorithm is similar to alpha-beta pruning on a minmax search, from the field of AI. This optimization pass can optimize min/max expressions where operands are min/max expressions. Such code can appear in shaders by itself, or as the result of clamp() or AMD_shader_trinary_minmax functions. This optimization pass improves the generated code for piglit's AMD_shader_trinary_minmax tests as follows: total instructions in shared programs: 75 - 67 (-10.67%) instructions in affected programs: 60 - 52 (-13.33%) GAINED:0 LOST: 0 All tests (max3, min3, mid3) improved. And I assume no piglit regressions? Indeed no regressions, or new successes. I wrote that in the cover letter, I should have written it also in this patch's commit message... Also... have you tried this in combination with Abdiel's related work on saturates? Tested the combination now, after some fighting with shader-db. The results are the same, except : One shader from Dungeon Defenders is hurt by shader-db metrics (26 - 28), because of dropping of a (constant float (0.0)) operand, which was compiled to a saturate modifier. This shader compiled into the same code with or without my patches. Talked with Abdiel about the combination, recapping here: Our changes are orthogonal and not conflicting, so we can both proceed at our own paces. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76861 Signed-off-by: Petri Latvala petri.latv...@intel.com --- src/glsl/Makefile.sources | 1 + src/glsl/glsl_parser_extras.cpp | 1 + src/glsl/ir_optimization.h | 1 + src/glsl/opt_minmax.cpp | 395 4 files changed, 398 insertions(+) create mode 100644 src/glsl/opt_minmax.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index b54eae7..1ee80a3 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -95,6 +95,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/opt_flip_matrices.cpp \ $(GLSL_SRCDIR)/opt_function_inlining.cpp \ $(GLSL_SRCDIR)/opt_if_simplification.cpp \ + $(GLSL_SRCDIR)/opt_minmax.cpp \ $(GLSL_SRCDIR)/opt_noop_swizzle.cpp \ $(GLSL_SRCDIR)/opt_rebalance_tree.cpp \ $(GLSL_SRCDIR)/opt_redundant_jumps.cpp \ diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 890123a..9f57ef3 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -1561,6 +1561,7 @@ do_common_optimization(exec_list *ir, bool linked, else progress = do_constant_variable_unlinked(ir) || progress; progress = do_constant_folding(ir) || progress; + progress = do_minmax_prune(ir) || progress; progress = do_cse(ir) || progress; progress = do_rebalance_tree(ir) || progress; progress = do_algebraic(ir, native_integers, options) || progress; diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index b83c225..9d22585 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -98,6 +98,7 @@ bool opt_flatten_nested_if_blocks(exec_list *instructions); bool do_discard_simplification(exec_list *instructions); bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0); bool do_mat_op_to_vec(exec_list *instructions); +bool do_minmax_prune(exec_list *instructions); bool do_noop_swizzle(exec_list *instructions); bool do_structure_splitting(exec_list *instructions); bool do_swizzle_swizzle(exec_list *instructions); diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp new file mode 100644 index 000..5656059 --- /dev/null +++ b/src/glsl/opt_minmax.cpp @@ -0,0 +1,395 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF
Re: [Mesa-dev] [PATCH] squash! glsl: Optimize min/max expression trees
On 08/14/2014 07:04 AM, Matt Turner wrote: --- I'd squash this in at minimum. The changes are - Whitespace - Removal of unnecessary destructor - Renaming one and two to a and b (one-value.u[c0] two-value.u[c0]...) - continue - break - assert(!...) - unreachable - Not doing assignments in if conditionals - Marking swizzle_if_required as static Thanks, I'll squash this in. I also think less_all_components should just return an enum like { MIXED, EQUAL, LESS, GREATER }, rather than setting a variable in the class. It, as well as smaller/larger_constant, can then be static functions outside of the visitor. Yes, I'll try what it looks like with that. I think the algorithm itself looks correct. src/glsl/opt_minmax.cpp | 145 +--- 1 file changed, 63 insertions(+), 82 deletions(-) diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp index 5656059..b987386 100644 --- a/src/glsl/opt_minmax.cpp +++ b/src/glsl/opt_minmax.cpp @@ -37,12 +37,10 @@ #include glsl_types.h #include main/macros.h -namespace -{ -class minmax_range -{ -public: +namespace { +class minmax_range { +public: minmax_range(ir_constant *low = NULL, ir_constant *high = NULL) { range[0] = low; @@ -60,60 +58,45 @@ public: class ir_minmax_visitor : public ir_rvalue_enter_visitor { public: ir_minmax_visitor() - : progress(false) - , valid(true) - { - } - - virtual ~ir_minmax_visitor() + : progress(false), valid(true) { } - bool - less_all_components(ir_constant *one, ir_constant *two); - - ir_constant * - smaller_constant(ir_constant *one, ir_constant *two); - - ir_constant * - larger_constant(ir_constant *one, ir_constant *two); + bool less_all_components(ir_constant *a, ir_constant *b); + ir_constant *smaller_constant(ir_constant *a, ir_constant *b); + ir_constant *larger_constant(ir_constant *a, ir_constant *b); - minmax_range - combine_range(minmax_range r0, minmax_range r1, bool ismin); + minmax_range combine_range(minmax_range r0, minmax_range r1, bool ismin); - minmax_range - range_intersection(minmax_range r0, minmax_range r1); + minmax_range range_intersection(minmax_range r0, minmax_range r1); - minmax_range - get_range(ir_rvalue *rval); + minmax_range get_range(ir_rvalue *rval); - ir_rvalue * - prune_expression(ir_expression *expr, minmax_range baserange); + ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange); - void - handle_rvalue(ir_rvalue **rvalue); + void handle_rvalue(ir_rvalue **rvalue); bool progress; bool valid; }; /* - * Returns true if all vector components of `one' are less than of `two'. + * Returns true if all vector components of `a' are less than of `b'. * * If there are vector components that are less while others are greater, the * visitor is marked invalid and no further changes will be made to the IR. */ bool -ir_minmax_visitor::less_all_components(ir_constant *one, ir_constant *two) +ir_minmax_visitor::less_all_components(ir_constant *a, ir_constant *b) { - assert(one != NULL); - assert(two != NULL); + assert(a != NULL); + assert(b != NULL); - assert(one-type-base_type == two-type-base_type); + assert(a-type-base_type == b-type-base_type); - unsigned oneinc = one-type-is_scalar() ? 0 : 1; - unsigned twoinc = two-type-is_scalar() ? 0 : 1; - unsigned components = MAX2(one-type-components(), two-type-components()); + unsigned a_inc = a-type-is_scalar() ? 0 : 1; + unsigned b_inc = b-type-is_scalar() ? 0 : 1; + unsigned components = MAX2(a-type-components(), b-type-components()); /* No early escape. We need to go through all components and mark the * visitor as invalid if comparison yields less for some components and @@ -127,34 +110,34 @@ ir_minmax_visitor::less_all_components(ir_constant *one, ir_constant *two) for (unsigned i = 0, c0 = 0, c1 = 0; i components; -c0 += oneinc, c1 += twoinc, ++i) { - switch (one-type-base_type) { +c0 += a_inc, c1 += b_inc, ++i) { + switch (a-type-base_type) { case GLSL_TYPE_UINT: - if (one-value.u[c0] two-value.u[c1]) + if (a-value.u[c0] b-value.u[c1]) foundless = true; - else if (one-value.u[c0] two-value.u[c1]) + else if (a-value.u[c0] b-value.u[c1]) foundgreater = true; else foundequal = true; - continue; + break; case GLSL_TYPE_INT: - if (one-value.i[c0] two-value.i[c1]) + if (a-value.i[c0] b-value.i[c1]) foundless = true; - else if (one-value.i[c0] two-value.i[c1]) + else if (a-value.i[c0] b-value.i[c1]) foundgreater = true; else foundequal = true; - continue; + break; case
[Mesa-dev] [Bug 82538] Super Maryo Chronicles fails with st/mesa assertion failure
https://bugs.freedesktop.org/show_bug.cgi?id=82538 --- Comment #4 from smoki smoki00...@gmail.com --- (In reply to comment #2) (In reply to comment #1) I can still reproduce it with current Mesa Git. Does your Mesa build have assertions enabled? Ah sorry did not have it that time, so yeah bug is there. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On Mon, Aug 18, 2014 at 2:44 PM, Roland Scheidegger srol...@vmware.com wrote: Am 16.08.2014 02:12, schrieb Connor Abbott: I know what you might be thinking right now. Wait, *another* IR? Don't we already have like 5 of those, not counting all the driver-specific ones? Isn't this stuff complicated enough already? Well, there are some pretty good reasons to start afresh (again...). In the years we've been using GLSL IR, we've come to realize that, in fact, it's not what we want *at all* to do optimizations on. Ian has done a talk at FOSDEM that highlights some of the problems they've run into: https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e But here's the summary: * GLSL IR is way too much of a memory hog, since it has to make a new variable for each temporary the compiler creates and then each time you want to dereference that temporary you need to create an ir_dereference_variable that points to it which is also very cache-unfriendly (downright cache-mean!). * The expression trees were originally added so that we could do pattern matching to automatically optimize things, but this turned out to be both very difficult to do and not very helpful. Instead, all it does is add more complexity to the IR without much benefit - with SSA or having proper use-def chains, we could get back what the trees give us while also being able to do lots more optimizations. * We don't have the concept of basic blocks in GLSL IR, which makes a lot of optimizations harder because they were originally designed with basic blocks in mind - take, for example, my SSA series. I had to map a whole lot of concepts that were based on the control flow graph to this tree of statements that GLSL IR uses, and the end result wound up looking nothing at all like the original paper. This problem gets even worse for things like e.g. Global Code Motion that depend upon having the dominance tree. I originally wanted to modify GLSL IR to fix these problems by adding new instruction types that would address these issues and then converting back and forth between the old and the new form, but I realized that fixing all the problems would basically mean a complete rewrite - and if that's the case, then why don't we start from scratch? So I took Ken's suggestions and started designing, and then at Intel over the summer started implementing, a completely new IR which I call NIR that's at a lower level than GLSL IR, but still high-level enough to be mostly device-independant (different drivers may have different passes and different ways of lowering e.g. matrix multiplies) so that we can do generic optimizations on it. Having support for SSA from the beginning was also a must, because lots of optimisations that we really want for cleaning up DX9-translated games are either a lot easier in or made possible by SSA. I also made the decision for it to be typeless, because that's what the cool kids are all doing :) and for a lower-level, flat IR it seemed like the thing to do (it could have gone either way, though). So the key design points of NIR (pronounced either like near as in NIR is near! or to rhyme with burr) are: * It's flat (no expression trees) * It's typeless * Modifiers (abs, negate, saturate), swizzles, and write masks are part of ALU instructions * It includes enough GLSL-like things (variables that you can load from or store to, function calls) to be hardware-agnostic (although we don't have a way to represent matrix multiplies right now, but that could easily be added) to be able to do optimizations at a high level, while having lowering passes that convert variables to registers and input/output/uniform loads/stores that will open up more opportunities for optimization and save memory while being more hardware-specific. * Control flow consists of a tree of if statements and loops, like in GLSL IR, except the leaves of the tree are now basic blocks instead of instructions. Also, each basic block keeps track of its successors and predecessors, so the control flow graph is explicit in the IR. * SSA is natively supported, and SSA uses point directly to the SSA definition, which means that the use-def chains are always there, and def-use chains are kept by tracking the set of all uses for each definition. * It's written in C. (see the README in patch 3 and nir.h in patch 4 for more details) Some things that are missing or could be improved: * There's currently no alias tracking for inputs, outputs, and uniforms. This is especially important for uniforms because we don't pack them like we pack inputs and outputs. * We need a way to represent
Re: [Mesa-dev] [PATCH] squash! glsl: Optimize min/max expression trees
On 08/14/2014 11:00 AM, Connor Abbott wrote: Another thing I'd like to see is to change minmax_range to call things low and high instead of range[0] and range[1]. This helps readability, and the tricks with indirect addressing that having an array lets you do are things we really shouldn't be doing anyways because it's hard to follow. Sure, changing. As I mentioned before, swizzle_if_required() should probably use the ir_builder swizzle helpers. I copied swizzle_if_required from opt_algebraic. I'll squeeze in a patch that changes that as well. Or actually just refactor the function to live somewhere where it's reusable. I'm still not convinced that the algorithm is the best way to go about it. Right now, AFAICT, we do something like: - Pass in a base range, which is what the min's and max's above us in the tree will clamp the value we return to - Get the ranges for each subexpression (this is a recursive call) - Check and see if each operand is unnecessary (i.e. its range is strictly greater than the base range or strictly greater than the other argument for mins, the other way around for max's) As another thing, the logic for this part could be made a *lot* clearer by rearranging the code and commenting. I'd do something like: bool is_redundant = false /* whether this operand will never affect the final value of the min-max tree */ if (is_min) { /* if this operand will always be greater than the other one, it's redundant */ if (limit[i].low limit[1 - i].high) is_redundant = true; /* if this operand is always greater than baserange, then even if it's smaller than the other one it'll get clamped so it's redundant */ if (limit[i].low baserange.high) is_redundant = true; } else { ... the exact same logic mirrored ... } - Recurse into the subexpressions, computing the new baserange. What I think we should do instead is change prune_expression() to also return the range for the expression (it's now returning two things, so one would have to be passed via a class variable), so it would look like: - Pass in the base range - If this is a constant, return ourself and the range with low == high - Recurse into both subexpressions, setting both the range (limits[i]) and the new subexpression - If one of the subexpressions is redundant, return the other subexpression and its range - Otherwise, return ourself and the combination of the ranges This will allow us to do the recursion only once, instead of once in get_range() and once in prune_expression(), which will make things simpler and faster. You mean have only prune_expression(), cut out get_range()? I tried hard to have this recurse only once and it looks impossible to me. Consider this (hopefully this ascii art gets through fine): max / \ max max / \ / \ 3a b2 (If ascii art failed, it'smax(max(3, a), max(b, 2)) ) a and b are variables, 2 and 3 constants. 2 is to be dropped from the right subtree of the top max, but for that we need the 3 from the left subtree. prune_expression() on the left subtree will get us the 3 as the limit, which correctly drops the 2 when recursed to the right subtree. What about if 3 and 2 are swapped in the tree? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] glsl: Refactor the python test case generator
On 08/13/2014 01:51 AM, Dylan Baker wrote: On Tuesday, July 29, 2014 12:36:33 PM Petri Latvala wrote: Move the IR sexp builder helpers and test script creation parts of tests/lower_jumps/create_test_cases.py into tests/test_case_generator.py No functional changes. Signed-off-by: Petri Latvala petri.latv...@intel.com --- src/glsl/tests/lower_jumps/create_test_cases.py | 336 +++- src/glsl/tests/test_case_generator.py | 293 + 2 files changed, 334 insertions(+), 295 deletions(-) create mode 100644 src/glsl/tests/test_case_generator.py diff --git a/src/glsl/tests/lower_jumps/create_test_cases.py b/src/glsl/tests/lower_jumps/create_test_cases.py index 3be1079..9783627 100644 --- a/src/glsl/tests/lower_jumps/create_test_cases.py +++ b/src/glsl/tests/lower_jumps/create_test_cases.py @@ -27,278 +27,9 @@ import re import subprocess import sys -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py and test_case_generator.py, which are in parent dir from sexps import * - -def make_test_case(f_name, ret_type, body): -Create a simple optimization test case consisting of a single -function with the given name, return type, and body. - -Global declarations are automatically created for any undeclared -variables that are referenced by the function. All undeclared -variables are assumed to be floats. - -check_sexp(body) -declarations = {} -def make_declarations(sexp, already_declared = ()): -if isinstance(sexp, list): -if len(sexp) == 2 and sexp[0] == 'var_ref': -if sexp[1] not in already_declared: -declarations[sexp[1]] = [ -'declare', ['in'], 'float', sexp[1]] -elif len(sexp) == 4 and sexp[0] == 'assign': -assert sexp[2][0] == 'var_ref' -if sexp[2][1] not in already_declared: -declarations[sexp[2][1]] = [ -'declare', ['out'], 'float', sexp[2][1]] -make_declarations(sexp[3], already_declared) -else: -already_declared = set(already_declared) -for s in sexp: -if isinstance(s, list) and len(s) = 4 and \ -s[0] == 'declare': -already_declared.add(s[3]) -else: -make_declarations(s, already_declared) -make_declarations(body) -return declarations.values() + \ -[['function', f_name, ['signature', ret_type, ['parameters'], body]]] - - -# The following functions can be used to build expressions. - -def const_float(value): -Create an expression representing the given floating point value. -return ['constant', 'float', ['{0:.6f}'.format(value)]] - -def const_bool(value): -Create an expression representing the given boolean value. - -If value is not a boolean, it is converted to a boolean. So, for -instance, const_bool(1) is equivalent to const_bool(True). - -return ['constant', 'bool', ['{0}'.format(1 if value else 0)]] - -def gt_zero(var_name): -Create Construct the expression var_name 0 -return ['expression', 'bool', '', ['var_ref', var_name], const_float(0)] - - -# The following functions can be used to build complex control flow -# statements. All of these functions return statement lists (even -# those which only create a single statement), so that statements can -# be sequenced together using the '+' operator. - -def return_(value = None): -Create a return statement. -if value is not None: -return [['return', value]] -else: -return [['return']] - -def break_(): -Create a break statement. -return ['break'] - -def continue_(): -Create a continue statement. -return ['continue'] - -def simple_if(var_name, then_statements, else_statements = None): -Create a statement of the form - -if (var_name 0.0) { - then_statements -} else { - else_statements -} - -else_statements may be omitted. - -if else_statements is None: -else_statements = [] -check_sexp(then_statements) -check_sexp(else_statements) -return [['if', gt_zero(var_name), then_statements, else_statements]] - -def loop(statements): -Create a loop containing the given statements as its loop -body. - -check_sexp(statements) -return [['loop', statements]] - -def declare_temp(var_type, var_name): -Create a declaration of the form - -(declare (temporary) var_type var_name) - -return [['declare', ['temporary'], var_type, var_name]] - -def assign_x(var_name, value): -Create a statement that assigns value to the variable -var_name. The assignment uses the mask (x). - -
Re: [Mesa-dev] [PATCH 9/9] glsl: Add tests for minmax prune
On 08/13/2014 01:59 AM, Dylan Baker wrote: On Tuesday, July 29, 2014 12:36:39 PM Petri Latvala wrote: tests/minmax/create_test_cases.py generates the following tests: multiple_min*.opt_test: Construct a tree of min expressions for all permutations of a var_ref and three constants. They should all optimize to a single min with the variable and the smallest constant. multiple_max*.opt_test: Same as above, for max. mid3opt*.opt_test: Test that code generated from a mid3() for two constants and a var_ref optimizes to a single max and a single min. mixed_vectors*.opt_test: Test that the optimization pass doesn't modify expression trees with constant vectors where some components compare as less, some as greater. Signed-off-by: Petri Latvala petri.latv...@intel.com --- src/glsl/tests/minmax/.gitignore | 3 + src/glsl/tests/minmax/create_test_cases.py | 151 + 2 files changed, 154 insertions(+) create mode 100644 src/glsl/tests/minmax/.gitignore create mode 100644 src/glsl/tests/minmax/create_test_cases.py diff --git a/src/glsl/tests/minmax/.gitignore b/src/glsl/tests/minmax/.gitignore new file mode 100644 index 000..e98df62 --- /dev/null +++ b/src/glsl/tests/minmax/.gitignore @@ -0,0 +1,3 @@ +*.opt_test +*.expected +*.out diff --git a/src/glsl/tests/minmax/create_test_cases.py b/src/glsl/tests/minmax/create_test_cases.py new file mode 100644 index 000..4f78980 --- /dev/null +++ b/src/glsl/tests/minmax/create_test_cases.py @@ -0,0 +1,151 @@ +# coding=utf-8 +# +# Copyright © 2014 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the Software), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import os +import os.path +import re +import subprocess +import sys +import itertools This comment applies to all the patches. You're importing a bunch of modules you're not using, you should remove any that are not used. In this file os.path, re, and subprocess are not used. Oh, yes, leftovers from the refactoring. Fix inc. + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) +from sexps import * +from test_case_generator import * + +def test_multiple_max(): +doc_string = Test that multiple constants in multiple max expressions are reduced to a single max. What is this? If it's a docstring it's not assigned, it's just a triple quoted string at the start of the function or class. Fix this for the other functions as well I followed the convention from the jump lowering tests. It's the string printed to the generated test script. I'll fix the single-line strings to normal quotes. + +operands = [const_float(1), +const_float(2), +const_float(3), +['var_ref', 'a']] + +c = 1 +for ops in itertools.permutations(operands): +maxtree1 = reduce(lambda a, b: max_(a, b, 'float'), ops) +maxtree2 = reduce(lambda a, b: max_(b, a, 'float'), ops) + +expected = max_(const_float(3), ['var_ref', 'a'], 'float') + +input_sexp = make_test_case('main', 'void', ( +assign_x('b', maxtree1) + +assign_x('c', maxtree2) +)) +expected_sexp = make_test_case('main', 'void', ( +assign_x('b', expected) + +assign_x('c', expected) +)) + +create_test_case(doc_string, input_sexp, expected_sexp, 'multiple_max{0}'.format(c), 'do_minmax_prune') +c += 1 + +def test_multiple_min(): +doc_string = Test that multiple constants in multiple min expressions are reduced to a single min. + +operands = [const_float(1), +const_float(2), +const_float(3), +['var_ref', 'a']] + +c = 1 +for ops in itertools.permutations(operands): +mintree1 = reduce(lambda a, b: min_(a, b, 'float'), ops) +mintree2 = reduce(lambda a, b: min_(b, a, 'float'), ops) + +expected = min_(const_float(1),
Re: [Mesa-dev] [PATCH 1/2] c11/threads: add missing brackets around _MTX_INITIALIZER_NP
On 04/08/14 18:24, Emil Velikov wrote: On 02/08/14 02:51, Emil Velikov wrote: On 02/08/14 00:26, Ian Romanick wrote: On 08/01/2014 09:41 AM, Emil Velikov wrote: ... for win32 builds. Spotted this warning when I've imported the library into waffle, and gave mingw-w64-gcc a bash at compiling it. src/waffle/core/wcore_display.c:37:5: warning: missing braces around initializer [-Wmissing-braces] static mtx_t mutex = _MTX_INITIALIZER_NP; ^ Signed-off-by: Emil Velikov emil.l.veli...@gmail.com --- include/c11/threads_win32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/c11/threads_win32.h b/include/c11/threads_win32.h index 5298a84..35880ba 100644 --- a/include/c11/threads_win32.h +++ b/include/c11/threads_win32.h @@ -85,7 +85,7 @@ Configuration macro: #define TSS_DTOR_ITERATIONS 1 // FIXME: temporary non-standard hack to ease transition -#define _MTX_INITIALIZER_NP {(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0} +#define _MTX_INITIALIZER_NP {{(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0}} This is because CRITICAL_SECTION is actually a typedef of a pointer to some structure type, so it needs to be initialized like an array of strcutres. Yeah? I wish I could agree. CRITICAL_SECTION is a typedef of a typedef'ed struct (no pointers yet) where the first member is a struct *, as seen below. typedef B CRITICAL_SECTION; typedef struct A { struct *bla; ... } B; I remember spending a few hours reading and experimenting with this and every way I looked at it current code seems sane. In the end I've smashed the brackets not to pollute the build log and carried on with other stuff :) Don't think I have checked if MSVC complained about the issue though. Will give it a try next time I reboot. I searched a bit on the net, and I could not find a single example of initializing a win32 CRITICAL_SECTION this way. Is this a good idea? The FIXME comment doesn't inspire confidence... Same here. AFAICS one should init the mutex via InitializeCriticalSection or InitializeCriticalSectionAndSpinCount. Either of which is very Win32 specific and not at all portable. Perhaps Jose (the author) can share some more insights on the topic ? Fun stuff. MSVC produces _no_ warnings with or without this patch. Not sure what exactly is happening here, perhaps I'm hitting some obscure mingw-w64 (gcc?) bug ? Yes, it's weird. {(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0} always worked well for me, both MSVC and Mingw, without warnings. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] c11/threads: add missing brackets around _MTX_INITIALIZER_NP
On 02/08/14 00:26, Ian Romanick wrote: On 08/01/2014 09:41 AM, Emil Velikov wrote: ... for win32 builds. Spotted this warning when I've imported the library into waffle, and gave mingw-w64-gcc a bash at compiling it. src/waffle/core/wcore_display.c:37:5: warning: missing braces around initializer [-Wmissing-braces] static mtx_t mutex = _MTX_INITIALIZER_NP; ^ Signed-off-by: Emil Velikov emil.l.veli...@gmail.com --- include/c11/threads_win32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/c11/threads_win32.h b/include/c11/threads_win32.h index 5298a84..35880ba 100644 --- a/include/c11/threads_win32.h +++ b/include/c11/threads_win32.h @@ -85,7 +85,7 @@ Configuration macro: #define TSS_DTOR_ITERATIONS 1 // FIXME: temporary non-standard hack to ease transition -#define _MTX_INITIALIZER_NP {(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0} +#define _MTX_INITIALIZER_NP {{(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0}} This is because CRITICAL_SECTION is actually a typedef of a pointer to some structure type, so it needs to be initialized like an array of strcutres. Yeah? I searched a bit on the net, and I could not find a single example of initializing a win32 CRITICAL_SECTION this way. Is this a good idea? It's unavoidable. src/gallium/auxiliary/os/os_thread.h used to have a comment with a link to this: http://locklessinc.com/articles/pthreads_on_windows/ But it got lost with the c11/threads.h introduction. It' probably worth adding this link once again. The FIXME comment doesn't inspire confidence... That comment is because static initializers are not part of the C standard. This works fine from a Win32 POV. In other words, we should stop using static initializers, not because they cause problems on Windows, but because it's not part of the C11 standard, hence we'll be in trouble the day we want to use system provided C11 headers instead of our own. include/c11/threads_posix.h has the same: // FIXME: temporary non-standard hack to ease transition #define _MTX_INITIALIZER_NP PTHREAD_MUTEX_INITIALIZER Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] c11/threads: correct assertion
On 02/08/14 00:16, Ian Romanick wrote: On 08/01/2014 09:41 AM, Emil Velikov wrote: We should assert when either the function or the flag pointer is null or we'll end up with a null reference a few lines later. Currently unused by mesa thus it has gone unnoticed. Signed-off-by: Emil Velikov emil.l.veli...@gmail.com --- include/c11/threads_win32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/c11/threads_win32.h b/include/c11/threads_win32.h index 35880ba..a95cb78 100644 --- a/include/c11/threads_win32.h +++ b/include/c11/threads_win32.h @@ -296,7 +296,7 @@ static void impl_tss_dtor_invoke() static inline void call_once(once_flag *flag, void (*func)(void)) { -assert(!flag !func); +assert(flag func); This is why I generally prefer to compare pointers to NULL instead of just using them as booleans. assert(flag == NULL func == NULL); is much more obviously wrong than the current code. Either way, this patch is Reviewed-by: Ian Romanick ian.d.roman...@intel.com Yep. My bad. I added this assertion to see if I could silence MSVC static code analyzer's warnings. It was untested. And it didn't silence the warnings (probably because of this) so I shouldn't have commited it at all. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 18/20] i965/fs: Preserve CFG in the SEL peephole.
On Thu, Jul 24, 2014 at 07:54:25PM -0700, Matt Turner wrote: --- src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index d64cd98..f609138 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -212,23 +212,26 @@ fs_visitor::opt_peephole_sel() if (brw-gen == 6 if_inst-conditional_mod) { fs_inst *cmp_inst = CMP(reg_null_d, if_inst-src[0], if_inst-src[1], if_inst-conditional_mod); - if_inst-insert_before(cmp_inst); + if_inst-insert_before(block, cmp_inst); } + bblock_t *then_block = (bblock_t *)block-link.next; + bblock_t *else_block = (bblock_t *)block-else_block-link.next; Isn't this a pointer to the endif-block? I thought else-block would be bblock_t *else_block = (bblock_t *)block-then_block-link.next; or simply just bblock_t *else_block = (bblock_t *)block-else_block; + for (int i = 0; i movs; i++) { if (mov_imm_inst[i]) -if_inst-insert_before(mov_imm_inst[i]); - if_inst-insert_before(sel_inst[i]); +if_inst-insert_before(block, mov_imm_inst[i]); + if_inst-insert_before(block, sel_inst[i]); - then_mov[i]-remove(); - else_mov[i]-remove(); + then_mov[i]-remove(then_block); + else_mov[i]-remove(else_block); } progress = true; } if (progress) - invalidate_live_intervals(); + invalidate_live_intervals(false); return progress; } -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] squash! glsl: Optimize min/max expression trees
On Mon, Aug 18, 2014 at 9:26 AM, Petri Latvala petri.latv...@intel.com wrote: On 08/14/2014 11:00 AM, Connor Abbott wrote: Another thing I'd like to see is to change minmax_range to call things low and high instead of range[0] and range[1]. This helps readability, and the tricks with indirect addressing that having an array lets you do are things we really shouldn't be doing anyways because it's hard to follow. Sure, changing. As I mentioned before, swizzle_if_required() should probably use the ir_builder swizzle helpers. I copied swizzle_if_required from opt_algebraic. I'll squeeze in a patch that changes that as well. Or actually just refactor the function to live somewhere where it's reusable. I'm still not convinced that the algorithm is the best way to go about it. Right now, AFAICT, we do something like: - Pass in a base range, which is what the min's and max's above us in the tree will clamp the value we return to - Get the ranges for each subexpression (this is a recursive call) - Check and see if each operand is unnecessary (i.e. its range is strictly greater than the base range or strictly greater than the other argument for mins, the other way around for max's) As another thing, the logic for this part could be made a *lot* clearer by rearranging the code and commenting. I'd do something like: bool is_redundant = false /* whether this operand will never affect the final value of the min-max tree */ if (is_min) { /* if this operand will always be greater than the other one, it's redundant */ if (limit[i].low limit[1 - i].high) is_redundant = true; /* if this operand is always greater than baserange, then even if it's smaller than the other one it'll get clamped so it's redundant */ if (limit[i].low baserange.high) is_redundant = true; } else { ... the exact same logic mirrored ... } - Recurse into the subexpressions, computing the new baserange. What I think we should do instead is change prune_expression() to also return the range for the expression (it's now returning two things, so one would have to be passed via a class variable), so it would look like: - Pass in the base range - If this is a constant, return ourself and the range with low == high - Recurse into both subexpressions, setting both the range (limits[i]) and the new subexpression - If one of the subexpressions is redundant, return the other subexpression and its range - Otherwise, return ourself and the combination of the ranges This will allow us to do the recursion only once, instead of once in get_range() and once in prune_expression(), which will make things simpler and faster. You mean have only prune_expression(), cut out get_range()? I tried hard to have this recurse only once and it looks impossible to me. Consider this (hopefully this ascii art gets through fine): max / \ max max / \ / \ 3a b2 (If ascii art failed, it'smax(max(3, a), max(b, 2)) ) a and b are variables, 2 and 3 constants. 2 is to be dropped from the right subtree of the top max, but for that we need the 3 from the left subtree. prune_expression() on the left subtree will get us the 3 as the limit, which correctly drops the 2 when recursed to the right subtree. What about if 3 and 2 are swapped in the tree? Ah, I see. Can you add a comment somewhere (perhaps before the call to get_range()) that explains this all so some dummy like me doesn't later ask why we recurse twice? Something like: Recurse to get the ranges for each of the subtrees of this expression. We need to do this as a separate step because we need to know the ranges of each of the subtrees before we prune either one. Consider something like this: (your ASCII art) We would like to prune away the max on the bottom-right, but to do so we need to know the range of the expression on the left beforehand, and there's no guarantee that we will visit either subtree in a particular order. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On 18/08/14 14:21, Marek Olšák wrote: On Mon, Aug 18, 2014 at 2:44 PM, Roland Scheidegger srol...@vmware.com wrote: Am 16.08.2014 02:12, schrieb Connor Abbott: I know what you might be thinking right now. Wait, *another* IR? Don't we already have like 5 of those, not counting all the driver-specific ones? Isn't this stuff complicated enough already? Well, there are some pretty good reasons to start afresh (again...). In the years we've been using GLSL IR, we've come to realize that, in fact, it's not what we want *at all* to do optimizations on. Ian has done a talk at FOSDEM that highlights some of the problems they've run into: https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e But here's the summary: * GLSL IR is way too much of a memory hog, since it has to make a new variable for each temporary the compiler creates and then each time you want to dereference that temporary you need to create an ir_dereference_variable that points to it which is also very cache-unfriendly (downright cache-mean!). * The expression trees were originally added so that we could do pattern matching to automatically optimize things, but this turned out to be both very difficult to do and not very helpful. Instead, all it does is add more complexity to the IR without much benefit - with SSA or having proper use-def chains, we could get back what the trees give us while also being able to do lots more optimizations. * We don't have the concept of basic blocks in GLSL IR, which makes a lot of optimizations harder because they were originally designed with basic blocks in mind - take, for example, my SSA series. I had to map a whole lot of concepts that were based on the control flow graph to this tree of statements that GLSL IR uses, and the end result wound up looking nothing at all like the original paper. This problem gets even worse for things like e.g. Global Code Motion that depend upon having the dominance tree. I originally wanted to modify GLSL IR to fix these problems by adding new instruction types that would address these issues and then converting back and forth between the old and the new form, but I realized that fixing all the problems would basically mean a complete rewrite - and if that's the case, then why don't we start from scratch? So I took Ken's suggestions and started designing, and then at Intel over the summer started implementing, a completely new IR which I call NIR that's at a lower level than GLSL IR, but still high-level enough to be mostly device-independant (different drivers may have different passes and different ways of lowering e.g. matrix multiplies) so that we can do generic optimizations on it. Having support for SSA from the beginning was also a must, because lots of optimisations that we really want for cleaning up DX9-translated games are either a lot easier in or made possible by SSA. I also made the decision for it to be typeless, because that's what the cool kids are all doing :) and for a lower-level, flat IR it seemed like the thing to do (it could have gone either way, though). So the key design points of NIR (pronounced either like near as in NIR is near! or to rhyme with burr) are: * It's flat (no expression trees) * It's typeless * Modifiers (abs, negate, saturate), swizzles, and write masks are part of ALU instructions * It includes enough GLSL-like things (variables that you can load from or store to, function calls) to be hardware-agnostic (although we don't have a way to represent matrix multiplies right now, but that could easily be added) to be able to do optimizations at a high level, while having lowering passes that convert variables to registers and input/output/uniform loads/stores that will open up more opportunities for optimization and save memory while being more hardware-specific. * Control flow consists of a tree of if statements and loops, like in GLSL IR, except the leaves of the tree are now basic blocks instead of instructions. Also, each basic block keeps track of its successors and predecessors, so the control flow graph is explicit in the IR. * SSA is natively supported, and SSA uses point directly to the SSA definition, which means that the use-def chains are always there, and def-use chains are kept by tracking the set of all uses for each definition. * It's written in C. (see the README in patch 3 and nir.h in patch 4 for more details) Some things that are missing or could be improved: * There's currently no alias tracking for inputs, outputs, and uniforms. This is especially important for uniforms because we don't pack them like we pack inputs and outputs. * We need a way to represent matrix multiplies so that we can do
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On Mon, Aug 18, 2014 at 8:44 AM, Roland Scheidegger srol...@vmware.com wrote: Am 16.08.2014 02:12, schrieb Connor Abbott: I know what you might be thinking right now. Wait, *another* IR? Don't we already have like 5 of those, not counting all the driver-specific ones? Isn't this stuff complicated enough already? Well, there are some pretty good reasons to start afresh (again...). In the years we've been using GLSL IR, we've come to realize that, in fact, it's not what we want *at all* to do optimizations on. Ian has done a talk at FOSDEM that highlights some of the problems they've run into: https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e But here's the summary: * GLSL IR is way too much of a memory hog, since it has to make a new variable for each temporary the compiler creates and then each time you want to dereference that temporary you need to create an ir_dereference_variable that points to it which is also very cache-unfriendly (downright cache-mean!). * The expression trees were originally added so that we could do pattern matching to automatically optimize things, but this turned out to be both very difficult to do and not very helpful. Instead, all it does is add more complexity to the IR without much benefit - with SSA or having proper use-def chains, we could get back what the trees give us while also being able to do lots more optimizations. * We don't have the concept of basic blocks in GLSL IR, which makes a lot of optimizations harder because they were originally designed with basic blocks in mind - take, for example, my SSA series. I had to map a whole lot of concepts that were based on the control flow graph to this tree of statements that GLSL IR uses, and the end result wound up looking nothing at all like the original paper. This problem gets even worse for things like e.g. Global Code Motion that depend upon having the dominance tree. I originally wanted to modify GLSL IR to fix these problems by adding new instruction types that would address these issues and then converting back and forth between the old and the new form, but I realized that fixing all the problems would basically mean a complete rewrite - and if that's the case, then why don't we start from scratch? So I took Ken's suggestions and started designing, and then at Intel over the summer started implementing, a completely new IR which I call NIR that's at a lower level than GLSL IR, but still high-level enough to be mostly device-independant (different drivers may have different passes and different ways of lowering e.g. matrix multiplies) so that we can do generic optimizations on it. Having support for SSA from the beginning was also a must, because lots of optimisations that we really want for cleaning up DX9-translated games are either a lot easier in or made possible by SSA. I also made the decision for it to be typeless, because that's what the cool kids are all doing :) and for a lower-level, flat IR it seemed like the thing to do (it could have gone either way, though). So the key design points of NIR (pronounced either like near as in NIR is near! or to rhyme with burr) are: * It's flat (no expression trees) * It's typeless * Modifiers (abs, negate, saturate), swizzles, and write masks are part of ALU instructions * It includes enough GLSL-like things (variables that you can load from or store to, function calls) to be hardware-agnostic (although we don't have a way to represent matrix multiplies right now, but that could easily be added) to be able to do optimizations at a high level, while having lowering passes that convert variables to registers and input/output/uniform loads/stores that will open up more opportunities for optimization and save memory while being more hardware-specific. * Control flow consists of a tree of if statements and loops, like in GLSL IR, except the leaves of the tree are now basic blocks instead of instructions. Also, each basic block keeps track of its successors and predecessors, so the control flow graph is explicit in the IR. * SSA is natively supported, and SSA uses point directly to the SSA definition, which means that the use-def chains are always there, and def-use chains are kept by tracking the set of all uses for each definition. * It's written in C. (see the README in patch 3 and nir.h in patch 4 for more details) Some things that are missing or could be improved: * There's currently no alias tracking for inputs, outputs, and uniforms. This is especially important for uniforms because we don't pack them like we pack inputs and outputs. * We need a way to represent
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote: On 18/08/14 14:21, Marek Olšák wrote: On Mon, Aug 18, 2014 at 2:44 PM, Roland Scheidegger srol...@vmware.com wrote: Am 16.08.2014 02:12, schrieb Connor Abbott: I know what you might be thinking right now. Wait, *another* IR? Don't we already have like 5 of those, not counting all the driver-specific ones? Isn't this stuff complicated enough already? Well, there are some pretty good reasons to start afresh (again...). In the years we've been using GLSL IR, we've come to realize that, in fact, it's not what we want *at all* to do optimizations on. Ian has done a talk at FOSDEM that highlights some of the problems they've run into: https://urldefense.proofpoint.com/v1/url?u=https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webmk=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0Ar=F4msKE2WxRzA%2BwN%2B25muztFm5TSPwE8HKJfWfR2NgfY%3D%0Am=iXhCeAYmidPDc1lFo757Cc9V0PvWAN4n3X%2Fw%2B%2F7Lx%2Fs%3D%0As=f103fb26bf53eee64318a490517d1ee9ab88ecd29fcdbe49d54b5a27e7581c2e But here's the summary: * GLSL IR is way too much of a memory hog, since it has to make a new variable for each temporary the compiler creates and then each time you want to dereference that temporary you need to create an ir_dereference_variable that points to it which is also very cache-unfriendly (downright cache-mean!). * The expression trees were originally added so that we could do pattern matching to automatically optimize things, but this turned out to be both very difficult to do and not very helpful. Instead, all it does is add more complexity to the IR without much benefit - with SSA or having proper use-def chains, we could get back what the trees give us while also being able to do lots more optimizations. * We don't have the concept of basic blocks in GLSL IR, which makes a lot of optimizations harder because they were originally designed with basic blocks in mind - take, for example, my SSA series. I had to map a whole lot of concepts that were based on the control flow graph to this tree of statements that GLSL IR uses, and the end result wound up looking nothing at all like the original paper. This problem gets even worse for things like e.g. Global Code Motion that depend upon having the dominance tree. I originally wanted to modify GLSL IR to fix these problems by adding new instruction types that would address these issues and then converting back and forth between the old and the new form, but I realized that fixing all the problems would basically mean a complete rewrite - and if that's the case, then why don't we start from scratch? So I took Ken's suggestions and started designing, and then at Intel over the summer started implementing, a completely new IR which I call NIR that's at a lower level than GLSL IR, but still high-level enough to be mostly device-independant (different drivers may have different passes and different ways of lowering e.g. matrix multiplies) so that we can do generic optimizations on it. Having support for SSA from the beginning was also a must, because lots of optimisations that we really want for cleaning up DX9-translated games are either a lot easier in or made possible by SSA. I also made the decision for it to be typeless, because that's what the cool kids are all doing :) and for a lower-level, flat IR it seemed like the thing to do (it could have gone either way, though). So the key design points of NIR (pronounced either like near as in NIR is near! or to rhyme with burr) are: * It's flat (no expression trees) * It's typeless * Modifiers (abs, negate, saturate), swizzles, and write masks are part of ALU instructions * It includes enough GLSL-like things (variables that you can load from or store to, function calls) to be hardware-agnostic (although we don't have a way to represent matrix multiplies right now, but that could easily be added) to be able to do optimizations at a high level, while having lowering passes that convert variables to registers and input/output/uniform loads/stores that will open up more opportunities for optimization and save memory while being more hardware-specific. * Control flow consists of a tree of if statements and loops, like in GLSL IR, except the leaves of the tree are now basic blocks instead of instructions. Also, each basic block keeps track of its successors and predecessors, so the control flow graph is explicit in the IR. * SSA is natively supported, and SSA uses point directly to the SSA definition, which means that the use-def chains are always there, and def-use chains are kept by tracking the set of all uses for each definition. * It's written in C. (see the README in patch 3 and nir.h in patch 4 for more details) Some things that are missing or could be improved: * There's currently no alias tracking for inputs, outputs, and uniforms. This is especially
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On Mon, Aug 18, 2014 at 4:32 AM, Michel Dänzer mic...@daenzer.net wrote: On 16.08.2014 09:12, Connor Abbott wrote: I know what you might be thinking right now. Wait, *another* IR? Don't we already have like 5 of those, not counting all the driver-specific ones? Isn't this stuff complicated enough already? Well, there are some pretty good reasons to start afresh (again...). In the years we've been using GLSL IR, we've come to realize that, in fact, it's not what we want *at all* to do optimizations on. Did you evaluate using LLVM IR instead of inventing yet another one? -- Earthling Michel Dänzer| http://www.amd.com Libre software enthusiast |Mesa and X developer Yes. See http://lists.freedesktop.org/archives/mesa-dev/2014-February/053502.html and http://lists.freedesktop.org/archives/mesa-dev/2014-February/053522.html ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On Mon, Aug 18, 2014 at 12:25 PM, Connor Abbott cwabbo...@gmail.com wrote: On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote: On 18/08/14 14:21, Marek Olšák wrote: Once these are in place, all development effort to go on to improving/leveraging the new IR. We could deprecate TGSI when it would have few users. Also, switching to LLVM, NIR, or some other IR that uses SSA (or at least modifying TGSI to support it) seems like something that's really necessary for the Gallium folks. Soon, considering most backends already use SSA in one form or another, the situation will look like: GLSL IR - NIR - NIR with SSA - optimizations - NIR without SSA - TGSI - backend without SSA - backend with SSA So backends would have to duplicate the into-SSA logic and every shader would have to pay the penalty of being converted out of and then back into SSA thanks to TGSI not supporting it. Looking at it another way, perhaps we should just accept that backends will want to do their own things, and try to minimize the damage by doing GLSL IR - transport ir - backend Are you envisioning a world where every backend uses NIR, and uses some sort of shared register allocation/spilling/etc logic, configurable instruction lists, pluggable with lowering passes? By then you've invented LLVM... -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 0/6] Implement ARB_conditional_render_inverted
On Sun, Aug 17, 2014 at 7:38 PM, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: This patch series adds support for ARB_conditional_render_inverted to nvc0, softpipe and llvmpipe. V2: - Add missing _mesa_BeginConditionalRender() parts to series - Fix nvc0 blit and inverted rendering - Fix relnotes - Enable for softpipe and llvmpipe - Rebase on top of current HEAD V3: - Only allow the new modes if the extension is enabled - Merge several patches to always have a working tree Tobias Klausmann (6): mesa: add ARB_conditional_render_inverted flags mesa/st: Support ARB_conditional_render_inverted modes gallium: Add and handle PIPE_CAP_CONDITIONAL_RENDER_INVERTED The order of these two patches needs to be swapped, the mesa/st change depends on the gallium change adding the cap... nvc0: Handle ARB_conditional_render_inverted and enable it llvmpipe/softpipe: enable ARB_conditional_render_inverted docs: Update status of ARB_conditional_render_inverted docs/GL3.txt | 2 +- docs/relnotes/10.3.html | 3 ++- src/gallium/docs/source/screen.rst | 2 ++ src/gallium/drivers/freedreno/freedreno_screen.c | 1 + src/gallium/drivers/i915/i915_screen.c | 1 + src/gallium/drivers/ilo/ilo_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_screen.c | 2 ++ src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_query.c| 5 ++--- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 4 +++- src/gallium/drivers/r300/r300_screen.c | 1 + src/gallium/drivers/r600/r600_pipe.c | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 1 + src/gallium/drivers/softpipe/sp_screen.c | 2 ++ src/gallium/drivers/svga/svga_screen.c | 1 + src/gallium/drivers/vc4/vc4_screen.c | 1 + src/gallium/include/pipe/p_defines.h | 1 + src/mesa/main/condrender.c | 10 -- src/mesa/main/extensions.c | 1 + src/mesa/main/mtypes.h | 1 + src/mesa/state_tracker/st_cb_condrender.c| 20 +++- src/mesa/state_tracker/st_extensions.c | 1 + 24 files changed, 56 insertions(+), 9 deletions(-) -- 1.8.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 4/6] nvc0: Handle ARB_conditional_render_inverted and enable it
On Sun, Aug 17, 2014 at 7:38 PM, Tobias Klausmann tobias.johannes.klausm...@mni.thm.de wrote: Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de --- src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 5 ++--- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 3 +-- src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 4 +++- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index 50cef1e..71d48f2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -542,7 +542,6 @@ nvc0_render_condition(struct pipe_context *pipe, struct nouveau_pushbuf *push = nvc0-base.pushbuf; struct nvc0_query *q; uint32_t cond; - boolean negated = FALSE; boolean wait = mode != PIPE_RENDER_COND_NO_WAIT mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; @@ -561,13 +560,13 @@ nvc0_render_condition(struct pipe_context *pipe, /* NOTE: comparison of 2 queries only works if both have completed */ switch (q-type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - cond = negated ? NVC0_3D_COND_MODE_EQUAL : + cond = condition ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_NOT_EQUAL; wait = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: - if (likely(!negated)) { + if (likely(!condition)) { if (unlikely(q-nesting)) cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : NVC0_3D_COND_MODE_ALWAYS; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 7c2f11a..84025ef 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -167,13 +167,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d = NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_COMPUTE: return (class_3d == NVE4_3D_CLASS) ? 1 : 0; - case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: - return 0; /* unsupported caps */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index a29f0cc..622193b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -1210,6 +1210,8 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) int64_t du_dx, dv_dy; int i; uint32_t mode; + uint32_t cond = nvc0-cond_cond ? NVC0_2D_COND_MODE_EQUAL : + NVC0_2D_COND_MODE_NOT_EQUAL; uint32_t mask = nv50_blit_eng2d_get_mask(info); boolean b; @@ -1236,7 +1238,7 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) } if (nvc0-cond_query info-render_condition_enable) - IMMED_NVC0(push, NVC0_2D(COND_MODE), NVC0_2D_COND_MODE_RES_NON_ZERO); + IMMED_NVC0(push, NVC0_2D(COND_MODE), cond); This used to always get set to NVC0_2D_COND_MODE_RES_NON_ZERO. Now it will never be set to that. I think you need to copy the cond selection logic from nvc0_query a little more faithfully... if (mask != 0x) { IMMED_NVC0(push, NVC0_2D(ROP), 0xca); /* DPSDxax */ -- 1.8.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On Mon, Aug 18, 2014 at 12:38 PM, Ilia Mirkin imir...@alum.mit.edu wrote: On Mon, Aug 18, 2014 at 12:25 PM, Connor Abbott cwabbo...@gmail.com wrote: On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote: On 18/08/14 14:21, Marek Olšák wrote: Once these are in place, all development effort to go on to improving/leveraging the new IR. We could deprecate TGSI when it would have few users. Also, switching to LLVM, NIR, or some other IR that uses SSA (or at least modifying TGSI to support it) seems like something that's really necessary for the Gallium folks. Soon, considering most backends already use SSA in one form or another, the situation will look like: GLSL IR - NIR - NIR with SSA - optimizations - NIR without SSA - TGSI - backend without SSA - backend with SSA So backends would have to duplicate the into-SSA logic and every shader would have to pay the penalty of being converted out of and then back into SSA thanks to TGSI not supporting it. Looking at it another way, perhaps we should just accept that backends will want to do their own things, and try to minimize the damage by doing GLSL IR - transport ir - backend Are you envisioning a world where every backend uses NIR, and uses some sort of shared register allocation/spilling/etc logic, configurable instruction lists, pluggable with lowering passes? By then you've invented LLVM... -ilia No, I expect that backends will still want to do their own register allocation/spilling/scheduling etc. - and besides for that, NIR supports structured control flow, swizzles and writemasks, modifiers (abs, negate, saturate), etc. natively in the IR instead of something that's tacked on or something that drivers have to do themselves. So no, I'm not re-inventing LLVM. On the other hand, it's entirely possible for backends to add their own backend-specific opcodes and intrinsics, and thereby be able to do some amount of lowering and optimization in NIR. Another reason that backends might want to accept NIR is so that they can give NIR passes more precise information on e.g. when to do if-conversion. Again, this is all speculative though - we'll have to do more of the work before we can find out how we want to use NIR beyond what originally wrote it to be, which was a way to do common optimizations that we couldn't do in GLSL IR. Connor ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/8] i965/fs: Optimize gl_FrontFacing calculation on Gen6+.
On Fri, Aug 15, 2014 at 4:43 PM, Matt Turner matts...@gmail.com wrote: On Fri, Aug 15, 2014 at 3:26 PM, Anuj Phogat anuj.pho...@gmail.com wrote: With comment on patch 3 addressed: Patches 1-7 are: Acked-by: Anuj Phogat anuj.pho...@gmail.com Thanks for looking over the patches! Acked-by is used in the kernel by a maintainer to acknowledge the changes to his particular subsystem in a patch that affects many subsystems, or for maintainers to say yeah, looks good to me. We use it in Mesa to say yeah, the idea seems good without really saying that we've reviewed the contents of the patch itself. The kernel docs describe what a Reviewed-by really means here [1], but basically it's that (1) I read the patch, (2) I'm satisfied with the patch, (3) I think the patch is worthwhile, and (4) I'm not making any guarantees. :-) Can I upgrade your Acked-bys to Reviewed-bys? Yes. go ahead. [1] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches#n498 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On Mon, Aug 18, 2014 at 7:05 PM, Connor Abbott cwabbo...@gmail.com wrote: On Mon, Aug 18, 2014 at 12:38 PM, Ilia Mirkin imir...@alum.mit.edu wrote: Looking at it another way, perhaps we should just accept that backends will want to do their own things, and try to minimize the damage by doing GLSL IR - transport ir - backend Are you envisioning a world where every backend uses NIR, and uses some sort of shared register allocation/spilling/etc logic, configurable instruction lists, pluggable with lowering passes? By then you've invented LLVM... -ilia No, I expect that backends will still want to do their own register allocation/spilling/scheduling etc. - and besides for that, NIR supports structured control flow, swizzles and writemasks, modifiers (abs, negate, saturate), etc. natively in the IR instead of something that's tacked on or something that drivers have to do themselves. So no, I'm not re-inventing LLVM. On the other hand, it's entirely possible for backends to add their own backend-specific opcodes and intrinsics, and thereby be able to do some amount of lowering and optimization in NIR. Another reason that backends might want to accept NIR is so that they can give NIR passes more precise information on e.g. when to do if-conversion. Again, this is all speculative though - we'll have to do more of the work before we can find out how we want to use NIR beyond what originally wrote it to be, which was a way to do common optimizations that we couldn't do in GLSL IR. This sounds good. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
Am 18.08.2014 19:05, schrieb Connor Abbott: On Mon, Aug 18, 2014 at 12:38 PM, Ilia Mirkin imir...@alum.mit.edu wrote: On Mon, Aug 18, 2014 at 12:25 PM, Connor Abbott cwabbo...@gmail.com wrote: On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote: On 18/08/14 14:21, Marek Olšák wrote: Once these are in place, all development effort to go on to improving/leveraging the new IR. We could deprecate TGSI when it would have few users. Also, switching to LLVM, NIR, or some other IR that uses SSA (or at least modifying TGSI to support it) seems like something that's really necessary for the Gallium folks. Soon, considering most backends already use SSA in one form or another, the situation will look like: GLSL IR - NIR - NIR with SSA - optimizations - NIR without SSA - TGSI - backend without SSA - backend with SSA So backends would have to duplicate the into-SSA logic and every shader would have to pay the penalty of being converted out of and then back into SSA thanks to TGSI not supporting it. Looking at it another way, perhaps we should just accept that backends will want to do their own things, and try to minimize the damage by doing GLSL IR - transport ir - backend Are you envisioning a world where every backend uses NIR, and uses some sort of shared register allocation/spilling/etc logic, configurable instruction lists, pluggable with lowering passes? By then you've invented LLVM... -ilia No, I expect that backends will still want to do their own register allocation/spilling/scheduling etc. - and besides for that, NIR supports structured control flow, swizzles and writemasks, modifiers (abs, negate, saturate), etc. natively in the IR instead of something that's tacked on or something that drivers have to do themselves. So no, I'm not re-inventing LLVM. On the other hand, it's entirely possible for backends to add their own backend-specific opcodes and intrinsics, and thereby be able to do some amount of lowering and optimization in NIR. Another reason that backends might want to accept NIR is so that they can give NIR passes more precise information on e.g. when to do if-conversion. Again, this is all speculative though - we'll have to do more of the work before we can find out how we want to use NIR beyond what originally wrote it to be, which was a way to do common optimizations that we couldn't do in GLSL IR. Connor I guess having the typical gpu features (vec4 representation along with swizzles, writemasks, modifiers) in the IR is nice, though I'm beginning to wonder if it's all that useful. Obviously, it maps really well to old gpus (like r300) and old-style shaders using lots of vec4 (the human-readable assembly is going to be much nicer if you have vec4 support) but ultimately it seems most newer archs are scalar (or rather, their vectors are not along the instruction level axis). Something like Mali gpus being an exception rather than the norm. Even things like r600 need to do their own vliw-ication anyway. In any case, for gallium I'm pretty indifferent to shader IR actually, as long as things keep working... Just keep in mind though while tgsi might not be the optimal solution, there's significant precedence for this kind of low level shader language (since even d3d10 follows that model, though I can't tell how happy the IHVs are with it...). But if NIR benefits glsl compiler on its own that looks all good to me, it's just an area I'm not really familiar with. btw do you have some example of how a shader looks printed out? I'm too lazy to play with it myself... Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 18/20] i965/fs: Preserve CFG in the SEL peephole.
On Mon, Aug 18, 2014 at 8:34 AM, Pohjolainen, Topi topi.pohjolai...@intel.com wrote: On Thu, Jul 24, 2014 at 07:54:25PM -0700, Matt Turner wrote: --- src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index d64cd98..f609138 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -212,23 +212,26 @@ fs_visitor::opt_peephole_sel() if (brw-gen == 6 if_inst-conditional_mod) { fs_inst *cmp_inst = CMP(reg_null_d, if_inst-src[0], if_inst-src[1], if_inst-conditional_mod); - if_inst-insert_before(cmp_inst); + if_inst-insert_before(block, cmp_inst); } + bblock_t *then_block = (bblock_t *)block-link.next; + bblock_t *else_block = (bblock_t *)block-else_block-link.next; Isn't this a pointer to the endif-block? I thought else-block would be bblock_t *else_block = (bblock_t *)block-then_block-link.next; or simply just bblock_t *else_block = (bblock_t *)block-else_block; It's the block immediately following the ELSE instruction (containing the MOVs). E.g., B0: ... IF B1: MOV MOV ELSE B2: MOV MOV B3: ENDIF ... then_block is B1, and else_block is B2. I can name them something else if that would make it clearer. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/7] i965/gen8: Add 3-src instruction compaction tables.
--- src/mesa/drivers/dri/i965/brw_eu_compact.c | 27 +++ 1 file changed, 27 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index dc0060d..1f30366 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -461,6 +461,33 @@ static const uint16_t gen8_src_index_table[32] = { 0b010110001000 }; +/* This is actually the control index table for Cherryview (26 bits), but the + * only difference from Broadwell (24 bits) is that it has two extra 0-bits at + * the start. + * + * The low 24 bits have the same mappings on both hardware. + */ +static const uint32_t gen8_3src_control_index_table[4] = { + 0b001111, + 0b000111, + 0b001001, + 0b001011 +}; + +/* This is actually the control index table for Cherryview (49 bits), but the + * only difference from Broadwell (46 bits) is that it has three extra 0-bits + * at the start. + * + * The low 44 bits have the same mappings on both hardware, and since the high + * three bits on Broadwell are zero, we can reuse Cherryview's table. + */ +static const uint64_t gen8_3src_source_index_table[4] = { + 0b0011100100111001001110010, + 0b00111001001110010011100100010, + 0b00111001001110010011100101000, + 0b00111001001110010011100100010 +}; + static const uint32_t *control_index_table; static const uint32_t *datatype_table; static const uint16_t *subreg_table; -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/7] i965/gen8: Add instruction compaction tables.
--- src/mesa/drivers/dri/i965/brw_eu_compact.c | 150 + 1 file changed, 150 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index f100297..dc0060d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -321,6 +321,146 @@ static const uint16_t gen7_src_index_table[32] = { 0b010110001000 }; +static const uint32_t gen8_control_index_table[32] = { + 0b010, + 0b100, + 0b101, + 0b110, + 0b111, + 0b1000100, + 0b1000101, + 0b1000111, + 0b1001000, + 0b1001001, + 0b1001101, + 0b110, + 0b111, + 0b1100010, + 0b1100011, + 0b1100100, + 0b1100101, + 0b1100111, + 0b1101001, + 0b1101101, + 0b111, + 0b111, + 0b0001000, + 0b0001010, + 0b0001100, + 0b0001001, + 0b0010110, + 0b0010111, + 0b0011000, + 0b0011001, + 0b0101000, + 0b0101001 +}; + +static const uint32_t gen8_datatype_table[32] = { + 0b00101, + 0b001000100, + 0b001000101, + 0b001001101, + 0b0010101011101, + 0b00100010111011101, + 0b0010001110101, + 0b00100011101000101, + 0b00100011101011101, + 0b001010101, + 0b001110100, + 0b001110101, + 0b001000101000101000101, + 0b001000111000101000100, + 0b001000111000101000101, + 0b001011100011101011101, + 0b001011101011100011101, + 0b001011101011101011100, + 0b001011101011101011101, + 0b00101011101011100, + 0b001001100, + 0b0010001011101, + 0b0010101000101, + 0b001010100, + 0b001000101000101000100, + 0b00100011100010100, + 0b00100100100101001, + 0b001010111011101011101, + 0b00101011101011101, + 0b00100001101001100, + 0b001001001001001001000, + 0b001001011001001001000 +}; + +static const uint16_t gen8_subreg_table[32] = { + 0b000, + 0b001, + 0b0001000, + 0b000, + 0b001, + 0b0001000, + 0b001, + 0b0011000, + 0b010, + 0b011, + 0b0101000, + 0b001, + 0b0010001, + 0b0011001, + 0b0011010, + 0b0011011, + 0b0011100, + 0b0011111, + 0b00110001000, + 0b00110001110, + 0b0011000, + 0b00100011000, + 0b00100001000, + 0b010, + 0b0111000, + 0b011, + 0b0001111, + 0b100, + 0b101, + 0b110, + 0b111, + 0b11100011100 +}; + +static const uint16_t gen8_src_index_table[32] = { + 0b, + 0b0010, + 0b0001, + 0b00010010, + 0b00011000, + 0b0010, + 0b00101000, + 0b01001000, + 0b0101, + 0b0111, + 0b0000, + 0b0011, + 0b00110010, + 0b00111000, + 0b00110001, + 0b001100010010, + 0b00110010, + 0b001100101000, + 0b001100111000, + 0b00110100, + 0b00110110, + 0b001101001000, + 0b00110101, + 0b00110110, + 0b001101101000, + 0b00110111, + 0b001101110001, + 0b00110000, + 0b010001101000, + 0b010001101001, + 0b010001101010, + 0b010110001000 +}; + static const uint32_t *control_index_table; static const uint32_t *datatype_table; static const uint16_t *subreg_table; @@ -679,8 +819,18 @@ brw_init_compaction_tables(struct brw_context *brw) assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0); assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0); assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0); + assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0); + assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0); + assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0); + assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0); switch (brw-gen) { + case 8: + control_index_table = gen8_control_index_table; + datatype_table = gen8_datatype_table; + subreg_table = gen8_subreg_table; + src_index_table = gen8_src_index_table; + break; case 7: control_index_table = gen7_control_index_table; datatype_table = gen7_datatype_table; -- 1.8.5.5 ___ mesa-dev
[Mesa-dev] [PATCH 7/7] i965: Enable instruction compaction on Gen8+.
--- src/mesa/drivers/dri/i965/brw_eu_compact.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 727fef5..280d7f7 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -1101,7 +1101,7 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, */ int old_ip[(p-next_insn_offset - start_offset) / 8]; - if (brw-gen 6 || brw-gen = 8) + if (brw-gen 6 || brw-gen 8) return; int src_offset; -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/7] i965: Broadwell/Cherryview instruction compaction
Available from git://people.freedesktop.org/~mattst88/mesa gen8compact (The branch is based on master before krh's fast clear series) Cherryview's instruction compaction is slightly different, and is yet untested. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/7] i965: Reverse condition ordering to let us support other gens.
--- src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 625cfbb..25a96e7 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -789,14 +789,14 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, case BRW_OPCODE_ELSE: case BRW_OPCODE_ENDIF: case BRW_OPCODE_WHILE: - if (brw-gen == 6) { + if (brw-gen = 7) { +update_uip_jip(brw, insn, this_old_ip, compacted_counts); + } else if (brw-gen == 6) { int gen6_jump_count = brw_inst_gen6_jump_count(brw, insn); target_old_ip = this_old_ip + gen6_jump_count; target_compacted_count = compacted_counts[target_old_ip]; gen6_jump_count -= (target_compacted_count - this_compacted_count); brw_inst_set_gen6_jump_count(brw, insn, gen6_jump_count); - } else { -update_uip_jip(brw, insn, this_old_ip, compacted_counts); } break; } -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/7] i965: Add support for compacting 3-src instructions on Gen8.
--- src/mesa/drivers/dri/i965/brw_eu_compact.c | 189 + 1 file changed, 189 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 07faff4..727fef5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -611,6 +611,97 @@ set_src1_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src, return true; } +static bool +set_3src_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) +{ + assert(brw-gen = 8); + + uint32_t uncompacted = /* 24b/BDW; 26b/CHV */ + (brw_inst_bits(src, 34, 32) 21) | /* 3b */ + (brw_inst_bits(src, 28, 8));/* 21b */ + + if (brw-is_cherryview) + uncompacted |= brw_inst_bits(src, 36, 35) 24; /* 2b */ + + for (int i = 0; i 4; i++) { + if (gen8_3src_control_index_table[i] == uncompacted) { + brw_compact_inst_set_3src_control_index(dst, i); +return true; + } + } + + return false; +} + +static bool +set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) +{ + assert(brw-gen = 8); + + uint64_t uncompacted =/* 46b/BDW; 49b/CHV */ + (brw_inst_bits(src, 83, 83) 43) | /* 1b */ + (brw_inst_bits(src, 114, 107) 35) | /* 8b */ + (brw_inst_bits(src, 93, 86) 27) | /* 8b */ + (brw_inst_bits(src, 72, 65) 19) | /* 8b */ + (brw_inst_bits(src, 55, 37));/* 19b */ + + if (brw-is_cherryview) { + uncompacted |= + (brw_inst_bits(src, 126, 125) 47) | /* 2b */ + (brw_inst_bits(src, 105, 104) 45) | /* 2b */ + (brw_inst_bits(src, 84, 84) 44); /* 1b */ + } else { + uncompacted |= + (brw_inst_bits(src, 125, 125) 45) | /* 1b */ + (brw_inst_bits(src, 104, 104) 44); /* 1b */ + } + + for (int i = 0; i 4; i++) { + if (gen8_3src_source_index_table[i] == uncompacted) { + brw_compact_inst_set_3src_source_index(dst, i); +return true; + } + } + + return false; +} + +static bool +brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst *dst, + brw_inst *src) +{ + assert(brw-gen = 8); + +#define compact(field) \ + brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(brw, src)) + + compact(opcode); + + if (!set_3src_control_index(brw, dst, src)) + return false; + + if (!set_3src_source_index(brw, dst, src)) + return false; + + compact(dst_reg_nr); + compact(src0_rep_ctrl); + brw_compact_inst_set_3src_cmpt_control(dst, true); + compact(debug_control); + compact(saturate); + compact(src1_rep_ctrl); + compact(src2_rep_ctrl); + compact(src0_reg_nr); + compact(src1_reg_nr); + compact(src2_reg_nr); + compact(src0_subreg_nr); + compact(src1_subreg_nr); + compact(src2_subreg_nr); + +#undef compact + + return true; +} + /* Compacted instructions have 12-bits for immediate sources, and a 13th bit * that's replicated through the high 20 bits. * @@ -627,6 +718,17 @@ is_compactable_immediate(unsigned imm) return imm == 0 || imm == 0xf000; } +/* Returns whether an opcode takes three sources. */ +static bool +is_3src(uint32_t op) +{ + return op == BRW_OPCODE_CSEL || + op == BRW_OPCODE_BFE || + op == BRW_OPCODE_BFI2 || + op == BRW_OPCODE_MAD || + op == BRW_OPCODE_LRP; +} + /** * Tries to compact instruction src into dst. * @@ -651,6 +753,16 @@ brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst, return false; } + if (brw-gen = 8 is_3src(brw_inst_opcode(brw, src))) { + memset(temp, 0, sizeof(temp)); + if (brw_try_compact_3src_instruction(brw, temp, src)) { + *dst = temp; + return true; + } else { + return false; + } + } + bool is_immediate = brw_inst_src0_reg_file(brw, src) == BRW_IMMEDIATE_VALUE || brw_inst_src1_reg_file(brw, src) == BRW_IMMEDIATE_VALUE; @@ -767,12 +879,89 @@ set_uncompacted_src1(struct brw_context *brw, brw_inst *dst, } } +static void +set_uncompacted_3src_control_index(struct brw_context *brw, brw_inst *dst, + brw_compact_inst *src) +{ + assert(brw-gen = 8); + + uint32_t compacted = brw_compact_inst_3src_control_index(src); + uint32_t uncompacted = gen8_3src_control_index_table[compacted]; + + brw_inst_set_bits(dst, 34, 32, (uncompacted 21) 0x7); + brw_inst_set_bits(dst, 28, 8, (uncompacted 0) 0x1f); + + if (brw-is_cherryview) + brw_inst_set_bits(dst, 36, 35, (uncompacted 24)); +} + +static void +set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst, + brw_compact_inst *src) +{ + assert(brw-gen = 8); + + uint32_t compacted = brw_compact_inst_3src_source_index(src); +
[Mesa-dev] [PATCH 5/7] i965: Add support for compacting 1- and 2-src instructions on Gen8.
--- src/mesa/drivers/dri/i965/brw_eu_compact.c | 48 ++ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 1f30366..07faff4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -496,14 +496,19 @@ static const uint16_t *src_index_table; static bool set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) { - uint32_t uncompacted = /* 17b/SNB; 19b/IVB+ */ - (brw_inst_bits(src, 31, 31) 16) | /* 1b */ - (brw_inst_bits(src, 23, 8));/* 16b */ + uint32_t uncompacted = brw-gen = 8 /* 17b/SNB; 19b/IVB+ */ + ? (brw_inst_bits(src, 33, 31) 16) | /* 3b */ +(brw_inst_bits(src, 23, 12) 4) | /* 12b */ +(brw_inst_bits(src, 10, 9) 2) | /* 2b */ +(brw_inst_bits(src, 34, 34) 1) | /* 1b */ +(brw_inst_bits(src, 8, 8)) /* 1b */ + : (brw_inst_bits(src, 31, 31) 16) | /* 1b */ +(brw_inst_bits(src, 23, 8));/* 16b */ /* On gen7, the flag register and subregister numbers are integrated into * the control index. */ - if (brw-gen = 7) + if (brw-gen == 7) uncompacted |= brw_inst_bits(src, 90, 89) 17; /* 2b */ for (int i = 0; i 32; i++) { @@ -520,9 +525,12 @@ static bool set_datatype_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) { - uint32_t uncompacted = /* 18b */ - (brw_inst_bits(src, 63, 61) 15) | /* 3b */ - (brw_inst_bits(src, 46, 32));/* 15b */ + uint32_t uncompacted = brw-gen = 8 /* 18b/SNB+; 21b/BDW+ */ + ? (brw_inst_bits(src, 63, 61) 18) | /* 3b */ +(brw_inst_bits(src, 94, 89) 12) | /* 6b */ +(brw_inst_bits(src, 46, 35)) /* 12b */ + : (brw_inst_bits(src, 63, 61) 15) | /* 3b */ +(brw_inst_bits(src, 46, 32));/* 15b */ for (int i = 0; i 32; i++) { if (datatype_table[i] == uncompacted) { @@ -692,11 +700,19 @@ set_uncompacted_control(struct brw_context *brw, brw_inst *dst, uint32_t uncompacted = control_index_table[brw_compact_inst_control_index(src)]; - brw_inst_set_bits(dst, 31, 31, (uncompacted 16) 0x1); - brw_inst_set_bits(dst, 23, 8, (uncompacted 0x)); + if (brw-gen = 8) { + brw_inst_set_bits(dst, 33, 31, (uncompacted 16)); + brw_inst_set_bits(dst, 23, 12, (uncompacted 4) 0xfff); + brw_inst_set_bits(dst, 10, 9, (uncompacted 2) 0x3); + brw_inst_set_bits(dst, 34, 34, (uncompacted 1) 0x1); + brw_inst_set_bits(dst, 8, 8, (uncompacted 0) 0x1); + } else { + brw_inst_set_bits(dst, 31, 31, (uncompacted 16) 0x1); + brw_inst_set_bits(dst, 23, 8, (uncompacted 0x)); - if (brw-gen = 7) - brw_inst_set_bits(dst, 90, 89, uncompacted 17); + if (brw-gen == 7) + brw_inst_set_bits(dst, 90, 89, uncompacted 17); + } } static void @@ -705,8 +721,14 @@ set_uncompacted_datatype(struct brw_context *brw, brw_inst *dst, { uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)]; - brw_inst_set_bits(dst, 63, 61, (uncompacted 15)); - brw_inst_set_bits(dst, 46, 32, (uncompacted 0x7fff)); + if (brw-gen = 8) { + brw_inst_set_bits(dst, 63, 61, (uncompacted 18)); + brw_inst_set_bits(dst, 94, 89, (uncompacted 12) 0x3f); + brw_inst_set_bits(dst, 46, 35, (uncompacted 0) 0xfff); + } else { + brw_inst_set_bits(dst, 63, 61, (uncompacted 15)); + brw_inst_set_bits(dst, 46, 32, (uncompacted 0x7fff)); + } } static void -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/7] i965: Update JIP/UIP compaction code to operate on bytes.
JIP/UIP were previously in units of compacted instructions. On Gen8 they're in units of bytes. --- src/mesa/drivers/dri/i965/brw_eu_compact.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 25a96e7..f100297 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -653,17 +653,19 @@ static void update_uip_jip(struct brw_context *brw, brw_inst *insn, int this_old_ip, int *compacted_counts) { - int jip = brw_inst_jip(brw, insn); + int scale = brw-gen = 8 ? sizeof(brw_compact_inst) : 1; + + int32_t jip = brw_inst_jip(brw, insn) / scale; jip -= compacted_between(this_old_ip, this_old_ip + jip, compacted_counts); - brw_inst_set_jip(brw, insn, jip); + brw_inst_set_jip(brw, insn, jip * scale); if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF || brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE) return; - int uip = brw_inst_uip(brw, insn); + int32_t uip = brw_inst_uip(brw, insn) / scale; uip -= compacted_between(this_old_ip, this_old_ip + uip, compacted_counts); - brw_inst_set_uip(brw, insn, uip); + brw_inst_set_uip(brw, insn, uip * scale); } void -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] glcpp: Don't use alternation in the lookahead for empty pragmas.
We've found that there's a buffer overrun bug in flex that's triggered by using alternation in a lookahead pattern. Fortunately, we don't need to match the exact {NEWLINE} expression to detect an empty pragma. It suffices to verify that there are no non-space characters before any newline character. So we can use a simple [\r\n] to get the desired behavior while avoiding the flex bug. Fixes Piglit's 16385-consecutive-chars and 17000-consecutive-chars-identifier tests. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82472 Signed-off-by: Carl Worth cwo...@cworth.org Approach-suggested-by: Kenneth Graunke kenn...@whitecape.org CC: Kenneth Graunke kenn...@whitecape.org --- Thanks for chasing down the fix for this regression of mine, Ken. I am embarrassed that I clearly didn't run piglit enough while testing my original branch. With your fix above, there is some state that's not updated as it should be when returning a NEWLINE token, (such as incrementing yylineno, etc.). I tried to improve things to update all that state, but it proved problematic, (putting the state updates in a common function doesn't work because only the outer lexing function has access to local variables like yylineno). The alternate approach here was your recommendation, of course. src/glsl/glcpp/glcpp-lex.l | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l index 98d500e..aaef7b8 100644 --- a/src/glsl/glcpp/glcpp-lex.l +++ b/src/glsl/glcpp/glcpp-lex.l @@ -289,8 +289,14 @@ HEXADECIMAL_INTEGER0[xX][0-9a-fA-F]+[uU]? } /* Swallow empty #pragma directives, (to avoid confusing the -* downstream compiler). */ -HASHpragma{HSPACE}*/{NEWLINE} { +* downstream compiler). +* +* Note: We use a simple regular expression for the lookahead +* here. Specifically, we cannot use the complete {NEWLINE} expression +* since it uses alternation and we've found that there's a flex bug +* where using alternation in the lookahead portion of a pattern +* triggers a buffer overrun. / +HASHpragma{HSPACE}*/[\r\n] { BEGIN INITIAL; } -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
Hi Connor! I've been scrolling through your github-repo a bit the latest weeks, and I have to say, this seems quite promising. I've got some questions that I haven't really been able to answer myself with the quick glimpse I've had over the codebase: Since we're in large making a mathematical graph rewriting simplifier-thingy just as much as a compiler, does the IR as of now have an easy way of storing upper and lower bounds of variables? Also, does it have an easy way to get something like the hierarchical visitor we have in GLSL IR? (A way of doing, say, algebraic optimizations the way we do now?) With these two in place, it would be easy to make a general bounds-checking optimization to eliminate max/min/sin/sign/cos/ etc operations. I believe that we, as of now, do not have such a pass. If this IR lands, I could probably fing some time to port some of the optimization-passes from GLSL IR to NIR. Regards, Thomas 2014-08-16 2:12 GMT+02:00 Connor Abbott cwabbo...@gmail.com: I know what you might be thinking right now. Wait, *another* IR? Don't we already have like 5 of those, not counting all the driver-specific ones? Isn't this stuff complicated enough already? Well, there are some pretty good reasons to start afresh (again...). In the years we've been using GLSL IR, we've come to realize that, in fact, it's not what we want *at all* to do optimizations on. Ian has done a talk at FOSDEM that highlights some of the problems they've run into: https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webm But here's the summary: * GLSL IR is way too much of a memory hog, since it has to make a new variable for each temporary the compiler creates and then each time you want to dereference that temporary you need to create an ir_dereference_variable that points to it which is also very cache-unfriendly (downright cache-mean!). * The expression trees were originally added so that we could do pattern matching to automatically optimize things, but this turned out to be both very difficult to do and not very helpful. Instead, all it does is add more complexity to the IR without much benefit - with SSA or having proper use-def chains, we could get back what the trees give us while also being able to do lots more optimizations. * We don't have the concept of basic blocks in GLSL IR, which makes a lot of optimizations harder because they were originally designed with basic blocks in mind - take, for example, my SSA series. I had to map a whole lot of concepts that were based on the control flow graph to this tree of statements that GLSL IR uses, and the end result wound up looking nothing at all like the original paper. This problem gets even worse for things like e.g. Global Code Motion that depend upon having the dominance tree. I originally wanted to modify GLSL IR to fix these problems by adding new instruction types that would address these issues and then converting back and forth between the old and the new form, but I realized that fixing all the problems would basically mean a complete rewrite - and if that's the case, then why don't we start from scratch? So I took Ken's suggestions and started designing, and then at Intel over the summer started implementing, a completely new IR which I call NIR that's at a lower level than GLSL IR, but still high-level enough to be mostly device-independant (different drivers may have different passes and different ways of lowering e.g. matrix multiplies) so that we can do generic optimizations on it. Having support for SSA from the beginning was also a must, because lots of optimisations that we really want for cleaning up DX9-translated games are either a lot easier in or made possible by SSA. I also made the decision for it to be typeless, because that's what the cool kids are all doing :) and for a lower-level, flat IR it seemed like the thing to do (it could have gone either way, though). So the key design points of NIR (pronounced either like near as in NIR is near! or to rhyme with burr) are: * It's flat (no expression trees) * It's typeless * Modifiers (abs, negate, saturate), swizzles, and write masks are part of ALU instructions * It includes enough GLSL-like things (variables that you can load from or store to, function calls) to be hardware-agnostic (although we don't have a way to represent matrix multiplies right now, but that could easily be added) to be able to do optimizations at a high level, while having lowering passes that convert variables to registers and input/output/uniform loads/stores that will open up more opportunities for optimization and save memory while being more hardware-specific. * Control flow consists of a tree of if statements and loops, like in GLSL IR, except the leaves of the tree are now basic blocks instead of instructions. Also, each basic block keeps track of its successors and predecessors, so the
[Mesa-dev] [PATCH 2/2] i965/vec4: Allow reswizzling writemasks when swizzle is single-valued.
total instructions in shared programs: 4288033 - 4266151 (-0.51%) instructions in affected programs: 930915 - 909033 (-2.35%) --- View under git show -w. Really just rearranging code. src/mesa/drivers/dri/i965/brw_vec4.cpp | 60 +++--- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index c1363ca..155016d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -946,24 +946,27 @@ vec4_instruction::can_reswizzle_dst(int dst_writemask, return false; switch (opcode) { - case BRW_OPCODE_DP4: - case BRW_OPCODE_DP3: - case BRW_OPCODE_DP2: - return true; default: - /* Check if there happens to be no reswizzling required. */ - for (int c = 0; c 4; c++) { - int bit = 1 BRW_GET_SWZ(swizzle, c); - /* Skip components of the swizzle not used by the dst. */ - if (!(dst_writemask (1 c))) -continue; + if (!brw_is_single_value_swizzle(swizzle)) { + /* Check if there happens to be no reswizzling required. */ + for (int c = 0; c 4; c++) { +int bit = 1 BRW_GET_SWZ(swizzle, c); +/* Skip components of the swizzle not used by the dst. */ +if (!(dst_writemask (1 c))) + continue; - /* We don't do the reswizzling yet, so just sanity check that we - * don't have to. - */ - if (bit != (1 c)) -return false; +/* We don't do the reswizzling yet, so just sanity check that we + * don't have to. + */ +if (bit != (1 c)) + return false; + } + return true; } + /* fallthrough */ + case BRW_OPCODE_DP4: + case BRW_OPCODE_DP3: + case BRW_OPCODE_DP2: return true; } } @@ -981,6 +984,21 @@ vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle) int new_writemask = 0; switch (opcode) { + default: + if (!brw_is_single_value_swizzle(swizzle)) { + for (int c = 0; c 4; c++) { +/* Skip components of the swizzle not used by the dst. */ +if (!(dst_writemask (1 c))) + continue; + +/* We don't do the reswizzling yet, so just sanity check that we + * don't have to. + */ +assert((1 BRW_GET_SWZ(swizzle, c)) == (1 c)); + } + break; + } + /* fallthrough */ case BRW_OPCODE_DP4: case BRW_OPCODE_DP3: case BRW_OPCODE_DP2: @@ -997,18 +1015,6 @@ vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle) } dst.writemask = new_writemask; break; - default: - for (int c = 0; c 4; c++) { - /* Skip components of the swizzle not used by the dst. */ - if (!(dst_writemask (1 c))) -continue; - - /* We don't do the reswizzling yet, so just sanity check that we - * don't have to. - */ - assert((1 BRW_GET_SWZ(swizzle, c)) == (1 c)); - } - break; } } -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965/vec4: Add a pass to reduce swizzles.
total instructions in shared programs: 4344280 - 4288033 (-1.29%) instructions in affected programs: 397468 - 341221 (-14.15%) --- Suggestions for a better name are welcome. src/mesa/drivers/dri/i965/brw_vec4.cpp | 98 ++ src/mesa/drivers/dri/i965/brw_vec4.h | 1 + 2 files changed, 99 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 5d4a92c..c1363ca 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -311,6 +311,103 @@ src_reg::equals(const src_reg r) const sizeof(fixed_hw_reg)) == 0); } +/* Replaces unused channels of a swizzle with channels that are used. + * + * For instance, this pass transforms + * + *mov vgrf4.yz, vgrf5.wxzy + * + * into + * + *mov vgrf4.yz, vgrf5.xxzx + * + * This eliminates false uses of some channels, letting dead code elimination + * remove the instructions that wrote them. + */ +bool +vec4_visitor::opt_reduce_swizzle() +{ + bool progress = false; + + foreach_in_list_safe(vec4_instruction, inst, instructions) { + if (inst-dst.file == BAD_FILE || inst-dst.file == HW_REG) + continue; + + int swizzle[4]; + + /* Determine which channels of the sources are read. */ + switch (inst-opcode) { + case BRW_OPCODE_DP4: + case BRW_OPCODE_DPH: /* FINISHME: DPH reads only three channels of src0, +* but all four of src1. +*/ + swizzle[0] = 0; + swizzle[1] = 1; + swizzle[2] = 2; + swizzle[3] = 3; + break; + case BRW_OPCODE_DP3: + swizzle[0] = 0; + swizzle[1] = 1; + swizzle[2] = 2; + swizzle[3] = -1; + break; + case BRW_OPCODE_DP2: + swizzle[0] = 0; + swizzle[1] = 1; + swizzle[2] = -1; + swizzle[3] = -1; + break; + default: + swizzle[0] = inst-dst.writemask WRITEMASK_X ? 0 : -1; + swizzle[1] = inst-dst.writemask WRITEMASK_Y ? 1 : -1; + swizzle[2] = inst-dst.writemask WRITEMASK_Z ? 2 : -1; + swizzle[3] = inst-dst.writemask WRITEMASK_W ? 3 : -1; + break; + } + + /* Resolve unread channels (-1) by assigning them the swizzle of the + * first channel that is used. + */ + int chosen = 0; + for (int i = 0; i 4; i++) { + if (swizzle[i] != -1) { +chosen = swizzle[i]; +break; + } + } + for (int i = 0; i 4; i++) { + if (swizzle[i] == -1) { +swizzle[i] = chosen; + } + } + + /* Update sources' swizzles. */ + for (int i = 0; i 3; i++) { + if (inst-src[i].file != GRF + inst-src[i].file != ATTR + inst-src[i].file != UNIFORM) +continue; + + int swiz[4]; + for (int j = 0; j 4; j++) { +swiz[j] = BRW_GET_SWZ(inst-src[i].swizzle, swizzle[j]); + } + + unsigned new_swizzle = BRW_SWIZZLE4(swiz[0], swiz[1], swiz[2], swiz[3]); + if (inst-src[i].swizzle != new_swizzle) { +inst-src[i].swizzle = new_swizzle; +progress = true; + } + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + static bool try_eliminate_instruction(vec4_instruction *inst, int new_writemask, const struct brw_context *brw) @@ -1701,6 +1798,7 @@ vec4_visitor::run() iteration++; int pass_num = 0; + OPT(opt_reduce_swizzle); OPT(dead_code_eliminate); OPT(dead_control_flow_eliminate, this); OPT(opt_copy_propagation); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c59d24f..f009dd2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -380,6 +380,7 @@ public: void calculate_live_intervals(); void invalidate_live_intervals(); void split_virtual_grfs(); + bool opt_reduce_swizzle(); bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); bool opt_copy_propagation(); -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 10/19] auxiliary/os: introduce os_get_total_physical_memory helper function
On , Emil Velikov wrote: Cc: Alexander von Gluck IV kallis...@unixzen.com Signed-off-by: Emil Velikov emil.l.veli...@gmail.com --- src/gallium/auxiliary/os/os_misc.c | 64 ++ src/gallium/auxiliary/os/os_misc.h | 7 + 2 files changed, 71 insertions(+) The Haiku portion of this patch looks good btw. I'll do a test build shortly. Thanks for cc'ing me :-) -- Alex ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] Fix surf-bankh init by default value when surf-tile_split == 0
Signed-off-by: Maks Naumov maksq...@ukr.net --- radeon/radeon_surface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/radeon/radeon_surface.c b/radeon/radeon_surface.c index e056ed4..40a544a 100644 --- a/radeon/radeon_surface.c +++ b/radeon/radeon_surface.c @@ -1311,7 +1311,7 @@ static int si_surface_sanity(struct radeon_surface_manager *surf_man, /* default value */ surf-mtilea = 1; surf-bankw = 1; -surf-bankw = 1; +surf-bankh = 1; surf-tile_split = 64; surf-stencil_tile_split = 64; } @@ -2138,7 +2138,7 @@ static int cik_surface_sanity(struct radeon_surface_manager *surf_man, /* default value */ surf-mtilea = 1; surf-bankw = 1; -surf-bankw = 1; +surf-bankh = 1; surf-tile_split = 64; surf-stencil_tile_split = 64; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On Mon, Aug 18, 2014 at 12:20 PM, Thomas Helland thomashellan...@gmail.com wrote: Hi Connor! I've been scrolling through your github-repo a bit the latest weeks, and I have to say, this seems quite promising. I've got some questions that I haven't really been able to answer myself with the quick glimpse I've had over the codebase: Since we're in large making a mathematical graph rewriting simplifier-thingy just as much as a compiler, does the IR as of now have an easy way of storing upper and lower bounds of variables? NIR as it stands doesn't have a way of storing upper and lower bounds of registers/SSA values (variables aren't used for computation in NIR), but it would be easy to do for an analysis pass - SSA values are indexed, so just put them in an array. Also, does it have an easy way to get something like the hierarchical visitor we have in GLSL IR? (A way of doing, say, algebraic optimizations the way we do now?) We don't have something like a hierarchical visitor in NIR, because it isn't necessary any more - certainly one could be created, though. Like I mentioned in the cover letter, we need one of two things to get the information we got with the expression trees (and actually even more): 1. Use-def chains (which definitions of this register can possibly reach this use?) and def-use chains (of all the uses of this register, which ones can be reached by this definition?) 2. SSA With SSA, use-def chains and def-use chains are trivial because each SSA value is defined only once: the use-def chain for each use is just the one definition, and the def-use chain for each definition is just the set of all uses, which we already keep track of. You can think of expression trees as a special case of SSA, where each definition has only one use. I think the plan for NIR is to just do all our optimizations in SSA, so we don't have to mess around with DU and UD chains at all. One of my pie-in-the-sky ideas is make a language for doing graph-rewriting where we can say things like a * 1.0 = a similar to what LLVM has now, except it might get difficult with all the swizzles, modifiers, etc. that NIR supports. With these two in place, it would be easy to make a general bounds-checking optimization to eliminate max/min/sin/sign/cos/ etc operations. I believe that we, as of now, do not have such a pass. Well, I think Petri has made a pass like that for GLSL IR, you may want to check it out - right now it only handles mins and maxes but you should be able to extend it to other things as well. Here's a sketch of how a bounds analysis pass for NIR in SSA would probably work (just making this up now, I haven't worked out the details): - Create an array that for each SSA value gives its range, initializing it to (-infinity, infinity) for each value expect for ones defined by load_const instructions, the output of sin and cos instructions, etc. - Create a worklist of SSA values, initially putting in it only the values that we didn't initialize to (-infinity, infinity). - While the worklist isn't empty: - Grab a value off the worklist - For each use of the value that is an ALU instruction: - Re-evaluate the bounds of the value the instruction defines - If the bound is now tighter and the value isn't already on the worklist, then put it in the worklist (note, this will probably work similarly for lots of other analysis passes, so it might be a good idea to abstract some of it out) Then, once you have the results of the analysis, you can do things like replacing all the uses of a max/min instruction with one of its inputs, etc. If this IR lands, I could probably fing some time to port some of the optimization-passes from GLSL IR to NIR. That would be cool. Once this stuff gets actually implemented, there's probably going to be a lot of low-hanging fruit when it comes to optimizations, especially since writing optimizations in SSA is so easy! Connor Regards, Thomas 2014-08-16 2:12 GMT+02:00 Connor Abbott cwabbo...@gmail.com: I know what you might be thinking right now. Wait, *another* IR? Don't we already have like 5 of those, not counting all the driver-specific ones? Isn't this stuff complicated enough already? Well, there are some pretty good reasons to start afresh (again...). In the years we've been using GLSL IR, we've come to realize that, in fact, it's not what we want *at all* to do optimizations on. Ian has done a talk at FOSDEM that highlights some of the problems they've run into: https://video.fosdem.org/2014/H1301_Cornil/Saturday/Three_Years_Experience_with_a_Treelike_Shader_IR.webm But here's the summary: * GLSL IR is way too much of a memory hog, since it has to make a new variable for each temporary the compiler creates and then each time you want to dereference that temporary you need to create an ir_dereference_variable that points to it which is also very cache-unfriendly (downright cache-mean!). * The expression
Re: [Mesa-dev] [RFC PATCH 00/16] A new IR for Mesa
On Mon, Aug 18, 2014 at 1:38 PM, Roland Scheidegger srol...@vmware.com wrote: Am 18.08.2014 19:05, schrieb Connor Abbott: On Mon, Aug 18, 2014 at 12:38 PM, Ilia Mirkin imir...@alum.mit.edu wrote: On Mon, Aug 18, 2014 at 12:25 PM, Connor Abbott cwabbo...@gmail.com wrote: On Mon, Aug 18, 2014 at 11:47 AM, Jose Fonseca jfons...@vmware.com wrote: On 18/08/14 14:21, Marek Olšák wrote: Once these are in place, all development effort to go on to improving/leveraging the new IR. We could deprecate TGSI when it would have few users. Also, switching to LLVM, NIR, or some other IR that uses SSA (or at least modifying TGSI to support it) seems like something that's really necessary for the Gallium folks. Soon, considering most backends already use SSA in one form or another, the situation will look like: GLSL IR - NIR - NIR with SSA - optimizations - NIR without SSA - TGSI - backend without SSA - backend with SSA So backends would have to duplicate the into-SSA logic and every shader would have to pay the penalty of being converted out of and then back into SSA thanks to TGSI not supporting it. Looking at it another way, perhaps we should just accept that backends will want to do their own things, and try to minimize the damage by doing GLSL IR - transport ir - backend Are you envisioning a world where every backend uses NIR, and uses some sort of shared register allocation/spilling/etc logic, configurable instruction lists, pluggable with lowering passes? By then you've invented LLVM... -ilia No, I expect that backends will still want to do their own register allocation/spilling/scheduling etc. - and besides for that, NIR supports structured control flow, swizzles and writemasks, modifiers (abs, negate, saturate), etc. natively in the IR instead of something that's tacked on or something that drivers have to do themselves. So no, I'm not re-inventing LLVM. On the other hand, it's entirely possible for backends to add their own backend-specific opcodes and intrinsics, and thereby be able to do some amount of lowering and optimization in NIR. Another reason that backends might want to accept NIR is so that they can give NIR passes more precise information on e.g. when to do if-conversion. Again, this is all speculative though - we'll have to do more of the work before we can find out how we want to use NIR beyond what originally wrote it to be, which was a way to do common optimizations that we couldn't do in GLSL IR. Connor I guess having the typical gpu features (vec4 representation along with swizzles, writemasks, modifiers) in the IR is nice, though I'm beginning to wonder if it's all that useful. Obviously, it maps really well to old gpus (like r300) and old-style shaders using lots of vec4 (the human-readable assembly is going to be much nicer if you have vec4 support) but ultimately it seems most newer archs are scalar (or rather, their vectors are not along the instruction level axis). Something like Mali gpus being an exception rather than the norm. Even things like r600 need to do their own vliw-ication anyway. In any case, for gallium I'm pretty indifferent to shader IR actually, as long as things keep working... Just keep in mind though while tgsi might not be the optimal solution, there's significant precedence for this kind of low level shader language (since even d3d10 follows that model, though I can't tell how happy the IHVs are with it...). But if NIR benefits glsl compiler on its own that looks all good to me, it's just an area I'm not really familiar with. FWIW, we use AMDIL internally: http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/AMD_Intermediate_Language_(IL)_Specification_v2.pdf http://developer.amd.com/tools-and-sdks/opencl-zone/amd-accelerated-parallel-processing-app-sdk/documentation/amd-app-documentation/ Alex btw do you have some example of how a shader looks printed out? I'm too lazy to play with it myself... Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Fix surf-bankh init by default value when surf-tile_split == 0
Reviewed-by: Marek Olšák marek.ol...@amd.com Do you have commit access? Marek On Mon, Aug 18, 2014 at 9:59 PM, Maks Naumov maksq...@ukr.net wrote: Signed-off-by: Maks Naumov maksq...@ukr.net --- radeon/radeon_surface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/radeon/radeon_surface.c b/radeon/radeon_surface.c index e056ed4..40a544a 100644 --- a/radeon/radeon_surface.c +++ b/radeon/radeon_surface.c @@ -1311,7 +1311,7 @@ static int si_surface_sanity(struct radeon_surface_manager *surf_man, /* default value */ surf-mtilea = 1; surf-bankw = 1; -surf-bankw = 1; +surf-bankh = 1; surf-tile_split = 64; surf-stencil_tile_split = 64; } @@ -2138,7 +2138,7 @@ static int cik_surface_sanity(struct radeon_surface_manager *surf_man, /* default value */ surf-mtilea = 1; surf-bankw = 1; -surf-bankw = 1; +surf-bankh = 1; surf-tile_split = 64; surf-stencil_tile_split = 64; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Fix surf-bankh init by default value when surf-tile_split == 0
I don't have access to git. Reviewed-by: Marek Olšák marek.ol...@amd.com Do you have commit access? Marek On Mon, Aug 18, 2014 at 9:59 PM, Maks Naumov maksq...@ukr.net wrote: Signed-off-by: Maks Naumov maksq...@ukr.net --- radeon/radeon_surface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/radeon/radeon_surface.c b/radeon/radeon_surface.c index e056ed4..40a544a 100644 --- a/radeon/radeon_surface.c +++ b/radeon/radeon_surface.c @@ -1311,7 +1311,7 @@ static int si_surface_sanity(struct radeon_surface_manager *surf_man, /* default value */ surf-mtilea = 1; surf-bankw = 1; - surf-bankw = 1; + surf-bankh = 1; surf-tile_split = 64; surf-stencil_tile_split = 64; } @@ -2138,7 +2138,7 @@ static int cik_surface_sanity(struct radeon_surface_manager *surf_man, /* default value */ surf-mtilea = 1; surf-bankw = 1; - surf-bankw = 1; + surf-bankh = 1; surf-tile_split = 64; surf-stencil_tile_split = 64; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] rbug: fix a crash in sampler_view_destroy caused by incorrect context
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/rbug/rbug_objects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/rbug/rbug_objects.c b/src/gallium/drivers/rbug/rbug_objects.c index c64b14c..2d80164 100644 --- a/src/gallium/drivers/rbug/rbug_objects.c +++ b/src/gallium/drivers/rbug/rbug_objects.c @@ -137,7 +137,7 @@ rbug_sampler_view_create(struct rbug_context *rb_context, rb_view-base.reference.count = 1; rb_view-base.texture = NULL; pipe_resource_reference(rb_view-base.texture, rb_resource-base); - rb_view-base.context = rb_context-pipe; + rb_view-base.context = rb_context-base; rb_view-sampler_view = view; return rb_view-base; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] rbug: only add textures to the list
From: Marek Olšák marek.ol...@amd.com rbug-gui cannot display buffers, so it's pointless to add them. --- src/gallium/drivers/rbug/rbug_objects.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/rbug/rbug_objects.c b/src/gallium/drivers/rbug/rbug_objects.c index 2d80164..db18f2e 100644 --- a/src/gallium/drivers/rbug/rbug_objects.c +++ b/src/gallium/drivers/rbug/rbug_objects.c @@ -58,7 +58,8 @@ rbug_resource_create(struct rbug_screen *rb_screen, rb_resource-base.screen = rb_screen-base; rb_resource-resource = resource; - rbug_screen_add_to_list(rb_screen, resources, rb_resource); + if (resource-target != PIPE_BUFFER) + rbug_screen_add_to_list(rb_screen, resources, rb_resource); return rb_resource-base; @@ -71,7 +72,9 @@ void rbug_resource_destroy(struct rbug_resource *rb_resource) { struct rbug_screen *rb_screen = rbug_screen(rb_resource-base.screen); - rbug_screen_remove_from_list(rb_screen, resources, rb_resource); + + if (rb_resource-base.target != PIPE_BUFFER) + rbug_screen_remove_from_list(rb_screen, resources, rb_resource); pipe_resource_reference(rb_resource-resource, NULL); FREE(rb_resource); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/6] rbug: remove contexts from the list properly
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/rbug/rbug_context.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 62fe543..ca94590 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -40,10 +40,12 @@ static void rbug_destroy(struct pipe_context *_pipe) { + struct rbug_screen *rb_screen = rbug_screen(_pipe-screen); struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe-pipe; - remove_from_list(rb_pipe-list); + rbug_screen_remove_from_list(rb_screen, contexts, rb_pipe); + pipe_mutex_lock(rb_pipe-call_mutex); pipe-destroy(pipe); rb_pipe-pipe = NULL; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] rbug: fix crash in set_vertex_buffers
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/rbug/rbug_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index ca94590..d6fca2e 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -758,7 +758,7 @@ rbug_set_vertex_buffers(struct pipe_context *_pipe, pipe_mutex_lock(rb_pipe-call_mutex); - if (num_buffers) { + if (num_buffers _buffers) { memcpy(unwrapped_buffers, _buffers, num_buffers * sizeof(*_buffers)); for (i = 0; i num_buffers; i++) unwrapped_buffers[i].buffer = rbug_resource_unwrap(_buffers[i].buffer); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] rbug: send the actual number of layers to the client
From: Marek Olšák marek.ol...@amd.com This sends the correct value for array textures. --- src/gallium/drivers/rbug/rbug_core.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/rbug/rbug_core.c b/src/gallium/drivers/rbug/rbug_core.c index c5b26b8..ece5e2f 100644 --- a/src/gallium/drivers/rbug/rbug_core.c +++ b/src/gallium/drivers/rbug/rbug_core.c @@ -204,6 +204,7 @@ rbug_texture_info(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32_ struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info *)header; struct rbug_list *ptr; struct pipe_resource *t; + unsigned num_layers; pipe_mutex_lock(rb_screen-list_mutex); foreach(ptr, rb_screen-resources) { @@ -219,11 +220,13 @@ rbug_texture_info(struct rbug_rbug *tr_rbug, struct rbug_header *header, uint32_ } t = tr_tex-resource; + num_layers = util_max_layer(t, 0) + 1; + rbug_send_texture_info_reply(tr_rbug-con, serial, t-target, t-format, t-width0, 1, t-height0, 1, - t-depth0, 1, + num_layers, 1, util_format_get_blockwidth(t-format), util_format_get_blockheight(t-format), util_format_get_blocksize(t-format), -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] rbug: implement streamout context functions
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/rbug/rbug_context.c | 46 + 1 file changed, 46 insertions(+) diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index d6fca2e..71bc216 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -803,6 +803,49 @@ rbug_set_sample_mask(struct pipe_context *_pipe, pipe_mutex_unlock(rb_pipe-call_mutex); } +static struct pipe_stream_output_target * +rbug_create_stream_output_target(struct pipe_context *_pipe, + struct pipe_resource *_res, + unsigned buffer_offset, unsigned buffer_size) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct pipe_context *pipe = rb_pipe-pipe; + struct pipe_resource *res = rbug_resource_unwrap(_res); + struct pipe_stream_output_target *target; + + pipe_mutex_lock(rb_pipe-call_mutex); + target = pipe-create_stream_output_target(pipe, res, buffer_offset, + buffer_size); + pipe_mutex_unlock(rb_pipe-call_mutex); + return target; +} + +static void +rbug_stream_output_target_destroy(struct pipe_context *_pipe, + struct pipe_stream_output_target *target) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct pipe_context *pipe = rb_pipe-pipe; + + pipe_mutex_lock(rb_pipe-call_mutex); + pipe-stream_output_target_destroy(pipe, target); + pipe_mutex_unlock(rb_pipe-call_mutex); +} + +static void +rbug_set_stream_output_targets(struct pipe_context *_pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct rbug_context *rb_pipe = rbug_context(_pipe); + struct pipe_context *pipe = rb_pipe-pipe; + + pipe_mutex_lock(rb_pipe-call_mutex); + pipe-set_stream_output_targets(pipe, num_targets, targets, offsets); + pipe_mutex_unlock(rb_pipe-call_mutex); +} + static void rbug_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *_dst, @@ -1174,6 +1217,9 @@ rbug_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) rb_pipe-base.set_vertex_buffers = rbug_set_vertex_buffers; rb_pipe-base.set_index_buffer = rbug_set_index_buffer; rb_pipe-base.set_sample_mask = rbug_set_sample_mask; + rb_pipe-base.create_stream_output_target = rbug_create_stream_output_target; + rb_pipe-base.stream_output_target_destroy = rbug_stream_output_target_destroy; + rb_pipe-base.set_stream_output_targets = rbug_set_stream_output_targets; rb_pipe-base.resource_copy_region = rbug_resource_copy_region; rb_pipe-base.blit = rbug_blit; rb_pipe-base.flush_resource = rbug_flush_resource; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/7] i965: Enable instruction compaction on Gen8+.
s/Gen8+/Gen8/ in the commit message, since it's still disabled for future gens. On Tue, Aug 19, 2014 at 6:19 AM, Matt Turner matts...@gmail.com wrote: --- src/mesa/drivers/dri/i965/brw_eu_compact.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 727fef5..280d7f7 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -1101,7 +1101,7 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, */ int old_ip[(p-next_insn_offset - start_offset) / 8]; - if (brw-gen 6 || brw-gen = 8) + if (brw-gen 6 || brw-gen 8) return; int src_offset; -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] i965: Update JIP/UIP compaction code to operate on bytes.
On Monday, August 18, 2014 11:19:48 AM Matt Turner wrote: JIP/UIP were previously in units of compacted instructions. On Gen8 they're in units of bytes. --- src/mesa/drivers/dri/i965/brw_eu_compact.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 25a96e7..f100297 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -653,17 +653,19 @@ static void update_uip_jip(struct brw_context *brw, brw_inst *insn, int this_old_ip, int *compacted_counts) { - int jip = brw_inst_jip(brw, insn); + int scale = brw-gen = 8 ? sizeof(brw_compact_inst) : 1; + + int32_t jip = brw_inst_jip(brw, insn) / scale; jip -= compacted_between(this_old_ip, this_old_ip + jip, compacted_counts); - brw_inst_set_jip(brw, insn, jip); + brw_inst_set_jip(brw, insn, jip * scale); if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF || brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE) return; - int uip = brw_inst_uip(brw, insn); + int32_t uip = brw_inst_uip(brw, insn) / scale; uip -= compacted_between(this_old_ip, this_old_ip + uip, compacted_counts); - brw_inst_set_uip(brw, insn, uip); + brw_inst_set_uip(brw, insn, uip * scale); } void This originally confused me a bit, but I believe it's correct. Here, your local variable jip is the jump distance in units of number of compact instructions. So, for Broadwell, you convert from bytes to that, subtract some number of compact instructions, and scale back up. You could instead do: int32_t jip = brw_inst_jip(brw, insn); jip -= scale * compacted_between(this_old_ip, this_old_ip + jip, compacted_counts); brw_inst_set_jip(brw, insn, jip); which is a bit less frobbing around and unit conversions. You could also do: int scale = brw_jump_scale(brw) / 2; if you wanted. I'll leave it up to you which style you prefer. Reviewed-by: Kenneth Graunke kenn...@whitecape.org signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 12/12] i965: Implement fast color clears using meta operations
Ken, It would be nice to get that patch [1] in fairly soon -- master is currently unusable. [1] http://cgit.freedesktop.org/~kwg/mesa/commit/?h=texturelockid=51b6879849f1efcfb28a45a63c2230ad0b2292e7 Consider it: Reviewed-and-tested-by: Chris Forbes chr...@ijw.co.nz On Mon, Aug 18, 2014 at 9:36 AM, Chris Forbes chr...@ijw.co.nz wrote: Yes, that fixes it. On Mon, Aug 18, 2014 at 9:01 AM, Kristian Høgsberg hoegsb...@gmail.com wrote: On Sun, Aug 17, 2014 at 11:36:55PM +1200, Chris Forbes wrote: This commit (2f28a0dc2 on master) causes various apps (at least glxgears vlc) to render garbage on my HSW GT3e. There are regular vertical bands of black pixels; on some frames, a few blocks of pixels within those bands are present; on others, not. Is that fixed by http://cgit.freedesktop.org/~kwg/mesa/commit/?h=texturelockid=51b6879849f1efcfb28a45a63c2230ad0b2292e7 ? The docs say When performing a render target resolve, PIPE_CONTROL with end of pipe sync must be delivered., which doesn't make it clear whether it's before or after. A RC flush before doing the resolve certainly makes sense, since you'd expect the resolve operation to have to read back from the MCS. Kristian On Tue, Aug 12, 2014 at 5:45 PM, Kristian Høgsberg hoegsb...@gmail.com wrote: On Mon, Aug 11, 2014 at 08:46:23PM -0400, Ilia Mirkin wrote: On Mon, Aug 11, 2014 at 8:29 PM, Kristian Høgsberg k...@bitplanet.net wrote: diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c index 97f1569..2456080 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_copy.c +++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c @@ -79,6 +79,8 @@ intel_copy_texsubimage(struct brw_context *brw, int dst_slice = slice + intelImage-base.Base.Face + intelImage-base.Base.TexObject-MinLayer; + _mesa_unlock_texture(brw-ctx, intelImage-base.Base.TexObject); + /* blit from src buffer to texture */ if (!intel_miptree_blit(brw, irb-mt, irb-mt_level, irb-mt_layer, @@ -89,6 +91,8 @@ intel_copy_texsubimage(struct brw_context *brw, return false; } + _mesa_lock_texture(brw-ctx, intelImage-base.Base.TexObject); + return true; } Just happened to notice in some aimless scrolling... you return above, so in one path you don't relock the texture, but in the other you do. Usually that sort of thing leads to trouble, but perhaps there's something going on here which makes it OK. If that's the case, probably deserves a comment. Nope, my bad. Thanks for catching that. Kristian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/7] i965: Enable instruction compaction on Gen8+.
On Monday, August 18, 2014 11:19:53 AM Matt Turner wrote: --- src/mesa/drivers/dri/i965/brw_eu_compact.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 727fef5..280d7f7 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -1101,7 +1101,7 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, */ int old_ip[(p-next_insn_offset - start_offset) / 8]; - if (brw-gen 6 || brw-gen = 8) + if (brw-gen 6 || brw-gen 8) return; int src_offset; I see no reason why this wouldn't work on Skylake, so I'd just go ahead and do: if (brw-gen 6) return; With that change, this is: Reviewed-by: Kenneth Graunke kenn...@whitecape.org signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/7] i965: Reverse condition ordering to let us support other gens.
On Monday, August 18, 2014 11:19:47 AM Matt Turner wrote: --- src/mesa/drivers/dri/i965/brw_eu_compact.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 625cfbb..25a96e7 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -789,14 +789,14 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, case BRW_OPCODE_ELSE: case BRW_OPCODE_ENDIF: case BRW_OPCODE_WHILE: - if (brw-gen == 6) { + if (brw-gen = 7) { +update_uip_jip(brw, insn, this_old_ip, compacted_counts); + } else if (brw-gen == 6) { int gen6_jump_count = brw_inst_gen6_jump_count(brw, insn); target_old_ip = this_old_ip + gen6_jump_count; target_compacted_count = compacted_counts[target_old_ip]; gen6_jump_count -= (target_compacted_count - this_compacted_count); brw_inst_set_gen6_jump_count(brw, insn, gen6_jump_count); - } else { -update_uip_jip(brw, insn, this_old_ip, compacted_counts); } break; } This isn't necessary - gen = 8 would've failed the gen == 6 check and hit the else case. You've just swapped them around for readability...which I like. Reviewed-by: Kenneth Graunke kenn...@whitecape.org signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] radeonsi: use r600_draw_rectangle from r600g
From: Marek Olšák marek.ol...@amd.com Rectangles are easier than triangles for the rasterizer. --- src/gallium/drivers/r600/r600_blit.c | 1 - src/gallium/drivers/r600/r600_pipe.c | 1 - src/gallium/drivers/r600/r600_pipe.h | 4 -- src/gallium/drivers/r600/r600_state_common.c | 64 --- src/gallium/drivers/radeon/r600_pipe_common.c | 64 +++ src/gallium/drivers/radeon/r600_pipe_common.h | 8 src/gallium/drivers/radeonsi/si_blit.c| 1 - src/gallium/drivers/radeonsi/si_pipe.c| 2 +- src/gallium/drivers/radeonsi/si_state_draw.c | 7 +-- 9 files changed, 77 insertions(+), 75 deletions(-) diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index c98206f..a3cfdae 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -22,7 +22,6 @@ */ #include r600_pipe.h #include util/u_surface.h -#include util/u_blitter.h #include util/u_format.h #include evergreend.h diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 4543347..226ad6e 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -30,7 +30,6 @@ #include errno.h #include pipe/p_shader_tokens.h -#include util/u_blitter.h #include util/u_debug.h #include util/u_memory.h #include util/u_simple_shaders.h diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index d04fef8..ee836b7 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -32,7 +32,6 @@ #include r600_llvm.h #include r600_public.h -#include util/u_blitter.h #include util/u_suballoc.h #include util/u_double_list.h #include util/u_transfer.h @@ -633,9 +632,6 @@ void r600_sampler_views_dirty(struct r600_context *rctx, void r600_sampler_states_dirty(struct r600_context *rctx, struct r600_sampler_states *state); void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state); -void r600_draw_rectangle(struct blitter_context *blitter, -int x1, int y1, int x2, int y2, float depth, -enum blitter_attrib_type type, const union pipe_color_union *attrib); uint32_t r600_translate_stencil_op(int s_op); uint32_t r600_translate_fill(uint32_t func); unsigned r600_tex_wrap(unsigned wrap); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index d29e137..d2f0d17 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -28,7 +28,6 @@ #include r600_shader.h #include r600d.h -#include util/u_draw_quad.h #include util/u_format_s3tc.h #include util/u_index_modify.h #include util/u_memory.h @@ -36,8 +35,6 @@ #include util/u_math.h #include tgsi/tgsi_parse.h -#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX - void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw) { assert(!cb-buf); @@ -1550,67 +1547,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info rctx-b.num_draw_calls++; } -void r600_draw_rectangle(struct blitter_context *blitter, -int x1, int y1, int x2, int y2, float depth, -enum blitter_attrib_type type, const union pipe_color_union *attrib) -{ - struct r600_context *rctx = (struct r600_context*)util_blitter_get_pipe(blitter); - struct pipe_viewport_state viewport; - struct pipe_resource *buf = NULL; - unsigned offset = 0; - float *vb; - - if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) { - util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib); - return; - } - - /* Some operations (like color resolve on r6xx) don't work -* with the conventional primitive types. -* One that works is PT_RECTLIST, which we use here. */ - - /* setup viewport */ - viewport.scale[0] = 1.0f; - viewport.scale[1] = 1.0f; - viewport.scale[2] = 1.0f; - viewport.scale[3] = 1.0f; - viewport.translate[0] = 0.0f; - viewport.translate[1] = 0.0f; - viewport.translate[2] = 0.0f; - viewport.translate[3] = 0.0f; - rctx-b.b.set_viewport_states(rctx-b.b, 0, 1, viewport); - - /* Upload vertices. The hw rectangle has only 3 vertices, -* I guess the 4th one is derived from the first 3. -* The vertex specification should match u_blitter's vertex element state. */ - u_upload_alloc(rctx-b.uploader, 0, sizeof(float) * 24, offset, buf, (void**)vb); - vb[0] = x1; - vb[1] = y1; - vb[2] = depth; - vb[3] = 1; - - vb[8] = x1; - vb[9] = y2; - vb[10] = depth; - vb[11] = 1; - - vb[16] = x2; - vb[17] = y1; -
[Mesa-dev] [PATCH 2/6] gallium/u_blitter: don't use an empty fragment shader if there's a colorbuffer
From: Marek Olšák marek.ol...@amd.com This is custom code used by some drivers. --- src/gallium/auxiliary/util/u_blitter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 20fbd80..609e02f 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -1799,7 +1799,7 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, pipe-bind_blend_state(pipe, cbsurf ? ctx-blend[PIPE_MASK_RGBA] : ctx-blend[0]); pipe-bind_depth_stencil_alpha_state(pipe, dsa_stage); - ctx-bind_fs_state(pipe, ctx-fs_empty); + ctx-bind_fs_state(pipe, cbsurf ? ctx-fs_write_one_cbuf : ctx-fs_empty); pipe-bind_vertex_elements_state(pipe, ctx-velem_state); /* set a framebuffer state */ -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] radeonsi: save scissor state and sample mask for u_blitter
From: Marek Olšák marek.ol...@amd.com Cc: mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/radeonsi/si_blit.c | 7 +++ src/gallium/drivers/radeonsi/si_state.c | 16 ++-- src/gallium/drivers/radeonsi/si_state.h | 14 -- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index bc31dfd..9a7a2fe 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -59,9 +59,16 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) util_blitter_save_geometry_shader(sctx-blitter, sctx-gs_shader); util_blitter_save_vertex_shader(sctx-blitter, sctx-vs_shader); util_blitter_save_vertex_elements(sctx-blitter, sctx-vertex_elements); + if (sctx-queued.named.sample_mask) { + util_blitter_save_sample_mask(sctx-blitter, + sctx-queued.named.sample_mask-sample_mask); + } if (sctx-queued.named.viewport) { util_blitter_save_viewport(sctx-blitter, sctx-queued.named.viewport-viewport); } + if (sctx-queued.named.scissor) { + util_blitter_save_scissor(sctx-blitter, sctx-queued.named.scissor-scissor); + } util_blitter_save_vertex_buffer_slot(sctx-blitter, sctx-vertex_buffer); util_blitter_save_so_targets(sctx-blitter, sctx-b.streamout.num_targets, (struct pipe_stream_output_target**)sctx-b.streamout.targets); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 98c19d6..fc928f3 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -458,18 +458,20 @@ static void si_set_scissor_states(struct pipe_context *ctx, const struct pipe_scissor_state *state) { struct si_context *sctx = (struct si_context *)ctx; - struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); + struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor); + struct si_pm4_state *pm4 = scissor-pm4; - if (pm4 == NULL) + if (scissor == NULL) return; + scissor-scissor = *state; si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, S_028250_TL_X(state-minx) | S_028250_TL_Y(state-miny) | S_028250_WINDOW_OFFSET_DISABLE(1)); si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, S_028254_BR_X(state-maxx) | S_028254_BR_Y(state-maxy)); - si_pm4_set_state(sctx, scissor, pm4); + si_pm4_set_state(sctx, scissor, scissor); } static void si_set_viewport_states(struct pipe_context *ctx, @@ -2774,16 +2776,18 @@ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) { struct si_context *sctx = (struct si_context *)ctx; - struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); + struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask); + struct si_pm4_state *pm4 = state-pm4; uint16_t mask = sample_mask; -if (pm4 == NULL) +if (state == NULL) return; + state-sample_mask = mask; si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask 16)); si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask 16)); - si_pm4_set_state(sctx, sample_mask, pm4); + si_pm4_set_state(sctx, sample_mask, state); } static void si_delete_sampler_state(struct pipe_context *ctx, void *state) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 82bea79..ce18a27 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -38,6 +38,16 @@ struct si_state_blend { boolalpha_to_one; }; +struct si_state_sample_mask { + struct si_pm4_state pm4; + uint16_tsample_mask; +}; + +struct si_state_scissor { + struct si_pm4_state pm4; + struct pipe_scissor_state scissor; +}; + struct si_state_viewport { struct si_pm4_state pm4; struct pipe_viewport_state viewport; @@ -82,8 +92,8 @@ union si_state { struct si_state_blend *blend; struct si_pm4_state *blend_color; struct si_pm4_state *clip; - struct si_pm4_state *sample_mask; - struct si_pm4_state *scissor; + struct si_state_sample_mask *sample_mask; + struct si_state_scissor *scissor; struct si_state_viewport*viewport;
[Mesa-dev] [PATCH 3/6] radeonsi: don't set CB_SHADER_MASK=1 if there are no color outputs
From: Marek Olšák marek.ol...@amd.com This hack isn't needed anymore because of the previous u_blitter commit. --- src/gallium/drivers/radeonsi/si_shader.c | 4 1 file changed, 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3fcd314..08ba8b0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1514,10 +1514,6 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) last_args[6]= uint-zero; last_args[7]= uint-zero; last_args[8]= uint-zero; - - si_shader_ctx-shader-spi_shader_col_format |= - V_028714_SPI_SHADER_32_ABGR; - si_shader_ctx-shader-cb_shader_mask |= S_02823C_OUTPUT0_ENABLE(0xf); } /* Specify whether the EXEC mask represents the valid mask */ -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] radeonsi: simplify si_num_banks function
From: Marek Olšák marek.ol...@amd.com This makes it easier to use. --- src/gallium/drivers/radeonsi/si_dma.c | 6 ++ src/gallium/drivers/radeonsi/si_state.c | 19 ++- src/gallium/drivers/radeonsi/si_state.h | 3 +-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index e908746..a69f533 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -173,8 +173,7 @@ static void si_dma_copy_tile(struct si_context *ctx, tile_split = cik_tile_split(rsrc-surface.tile_split); tile_mode_index = si_tile_mode_index(rsrc, src_level, util_format_has_stencil(util_format_description(src-format))); - nbanks = si_num_banks(sscreen, rsrc-surface.bpe, rsrc-surface.tile_split, - tile_mode_index); + nbanks = si_num_banks(sscreen, rsrc); base += rsrc-resource.gpu_address; addr += rdst-resource.gpu_address; } else { @@ -202,8 +201,7 @@ static void si_dma_copy_tile(struct si_context *ctx, tile_split = cik_tile_split(rdst-surface.tile_split); tile_mode_index = si_tile_mode_index(rdst, dst_level, util_format_has_stencil(util_format_description(dst-format))); - nbanks = si_num_banks(sscreen, rdst-surface.bpe, rdst-surface.tile_split, - tile_mode_index); + nbanks = si_num_banks(sscreen, rdst); base += rdst-resource.gpu_address; addr += rsrc-resource.gpu_address; } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index fc928f3..4ab2b8b 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -47,15 +47,14 @@ static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, *list_elem = atom; } -uint32_t si_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned tile_split, - unsigned tile_mode_index) +uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) { - if ((sscreen-b.chip_class == CIK) + if (sscreen-b.chip_class == CIK sscreen-b.info.cik_macrotile_mode_array_valid) { unsigned index, tileb; - tileb = 8 * 8 * bpe; - tileb = MIN2(tile_split, tileb); + tileb = 8 * 8 * tex-surface.bpe; + tileb = MIN2(tex-surface.tile_split, tileb); for (index = 0; tileb 64; index++) { tileb = 1; @@ -65,11 +64,14 @@ uint32_t si_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned tile_spl return (sscreen-b.info.cik_macrotile_mode_array[index] 6) 0x3; } - if ((sscreen-b.chip_class == SI) + if (sscreen-b.chip_class == SI sscreen-b.info.si_tile_mode_array_valid) { + /* Don't use stencil_tiling_index, because num_banks is always +* read from the depth mode. */ + unsigned tile_mode_index = tex-surface.tiling_index[0]; assert(tile_mode_index 32); - return (sscreen-b.info.si_tile_mode_array[tile_mode_index] 20) 0x3; + return G_009910_NUM_BANKS(sscreen-b.info.si_tile_mode_array[tile_mode_index]); } /* The old way. */ @@ -1820,8 +1822,7 @@ static void si_init_depth_surface(struct si_context *sctx, macro_aspect = cik_macro_tile_aspect(macro_aspect); bankw = cik_bank_wh(bankw); bankh = cik_bank_wh(bankh); - nbanks = si_num_banks(sscreen, rtex-surface.bpe, rtex-surface.tile_split, - ~0); + nbanks = si_num_banks(sscreen, rtex); tile_mode_index = si_tile_mode_index(rtex, level, false); pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index ce18a27..7362ad1 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -263,8 +263,7 @@ unsigned cik_bank_wh(unsigned bankwh); unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode); unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect); unsigned cik_tile_split(unsigned tile_split); -uint32_t si_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned tile_split, - unsigned tile_mode_index); +uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex); unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil); /* si_state_draw.c */
[Mesa-dev] [PATCH 1/6] gallium/util: handle PIPE_BUFFER in util_pipe_tex_to_tgsi_tex
From: Marek Olšák marek.ol...@amd.com --- src/gallium/auxiliary/util/u_inlines.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index e952615..c80ec48 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -565,6 +565,9 @@ util_pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target, unsigned nr_samples) { switch (pipe_tex_target) { + case PIPE_BUFFER: + return TGSI_TEXTURE_BUFFER; + case PIPE_TEXTURE_1D: assert(nr_samples = 1); return TGSI_TEXTURE_1D; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] radeonsi: set IA_MULTI_VGT_PARAM on SI the same as on CIK (v2)
From: Marek Olšák marek.ol...@amd.com Nothing's changed for CIK here. --- src/gallium/drivers/radeonsi/si_state.c | 6 -- src/gallium/drivers/radeonsi/si_state_draw.c | 90 +++- 2 files changed, 50 insertions(+), 46 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3d1e02a..0c6f62a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3110,12 +3110,6 @@ void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0); si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); - if (sctx-b.chip_class == SI) { - si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM, - S_028AA8_SWITCH_ON_EOP(1) | - S_028AA8_PARTIAL_VS_WAVE_ON(1) | - S_028AA8_PRIMGROUP_SIZE(63)); - } si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x); si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (sctx-b.chip_class CIK) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 573487c..2e999f6 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -379,6 +379,53 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode) return prim_conv[mode]; } +static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, + const struct pipe_draw_info *info) +{ + struct si_state_rasterizer *rs = sctx-queued.named.rasterizer; + unsigned prim = info-mode; + unsigned primgroup_size = 64; + + /* SWITCH_ON_EOP(0) is always preferable. */ + bool wd_switch_on_eop = false; + bool ia_switch_on_eop = false; + + /* This is a hardware requirement. */ + if ((rs rs-line_stipple_enable) || + (sctx-b.screen-debug_flags DBG_SWITCH_ON_EOP)) { + ia_switch_on_eop = true; + wd_switch_on_eop = true; + } + + if (sctx-b.chip_class = CIK) { + /* WD_SWITCH_ON_EOP has no effect on GPUs with less than +* 4 shader engines. Set 1 to pass the assertion below. +* The other cases are hardware requirements. */ + if (sctx-b.screen-info.max_se 4 || + prim == PIPE_PRIM_POLYGON || + prim == PIPE_PRIM_LINE_LOOP || + prim == PIPE_PRIM_TRIANGLE_FAN || + prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY || + info-primitive_restart) + wd_switch_on_eop = true; + + /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. +* We don't know that for indirect drawing, so treat it as +* always problematic. */ + if (sctx-b.family == CHIP_HAWAII + (info-indirect || info-instance_count 1)) + wd_switch_on_eop = true; + + /* If the WD switch is false, the IA switch must be false too. */ + assert(wd_switch_on_eop || !ia_switch_on_eop); + } + + return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | + S_028AA8_PARTIAL_VS_WAVE_ON(1) | + S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) | + S_028AA8_WD_SWITCH_ON_EOP(sctx-b.chip_class = CIK ? wd_switch_on_eop : 0); +} + static bool si_update_draw_info_state(struct si_context *sctx, const struct pipe_draw_info *info, const struct pipe_index_buffer *ib) @@ -391,6 +438,7 @@ static bool si_update_draw_info_state(struct si_context *sctx, sctx-gs_shader-current-shader.gs_output_prim : info-mode); unsigned ls_mask = 0; + unsigned ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info); if (pm4 == NULL) return false; @@ -401,55 +449,17 @@ static bool si_update_draw_info_state(struct si_context *sctx, } if (sctx-b.chip_class = CIK) { - struct si_state_rasterizer *rs = sctx-queued.named.rasterizer; - unsigned primgroup_size = 64; - - /* SWITCH_ON_EOP(0) is always preferable. */ - bool wd_switch_on_eop = false; - bool ia_switch_on_eop = false; - - /* WD_SWITCH_ON_EOP has no effect on GPUs with less than -* 4 shader engines. Set 1 to pass the assertion below. -* The other cases are hardware requirements. */ - if (sctx-b.screen-info.max_se 4 || - prim == V_008958_DI_PT_POLYGON || - prim == V_008958_DI_PT_LINELOOP || - prim ==
[Mesa-dev] [PATCH 3/4] radeonsi: bump PRIMGROUP_SIZE for some cases
From: Marek Olšák marek.ol...@amd.com Recommended by hw people. --- src/gallium/drivers/radeonsi/si_state_draw.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index f5d6550..0f700a8 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -384,13 +384,16 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, { struct si_state_rasterizer *rs = sctx-queued.named.rasterizer; unsigned prim = info-mode; - unsigned primgroup_size = 64; + unsigned primgroup_size = 128; /* recommended without a GS */ /* SWITCH_ON_EOP(0) is always preferable. */ bool wd_switch_on_eop = false; bool ia_switch_on_eop = false; bool partial_vs_wave = false; + if (sctx-gs_shader) + primgroup_size = 64; /* recommended with a GS */ + /* This is a hardware requirement. */ if ((rs rs-line_stipple_enable) || (sctx-b.screen-debug_flags DBG_SWITCH_ON_EOP)) { -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] r600g: copy IA_MULTI_VGT_PARAM programming from radeonsi for Cayman
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/r600/evergreen_state.c | 2 -- src/gallium/drivers/r600/r600_pipe.h | 2 +- src/gallium/drivers/r600/r600_state_common.c | 24 src/gallium/drivers/r600/r600d.h | 11 +++ 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index e6e9f49..841ad0c 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2243,8 +2243,6 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx) r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 1) | 1); - r600_store_context_reg(cb, CM_R_028AA8_IA_MULTI_VGT_PARAM, S_028AA8_SWITCH_ON_EOP(1) | S_028AA8_PARTIAL_VS_WAVE_ON(1) | S_028AA8_PRIMGROUP_SIZE(63)); - r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); r600_store_value(cb, 0x76543210); /* CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 */ r600_store_value(cb, 0xfedcba98); /* CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index ee836b7..e277269 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -40,7 +40,7 @@ /* the number of CS dwords for flushing and drawing */ #define R600_MAX_FLUSH_CS_DWORDS 16 -#define R600_MAX_DRAW_CS_DWORDS37 +#define R600_MAX_DRAW_CS_DWORDS40 #define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index d2f0d17..7594d0e 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1418,6 +1418,30 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info r600_emit_atom(rctx, rctx-atoms[i]); } + if (rctx-b.chip_class == CAYMAN) { + /* Copied from radeonsi. */ + unsigned primgroup_size = 128; /* recommended without a GS */ + bool ia_switch_on_eop = false; + bool partial_vs_wave = false; + + if (rctx-gs_shader) + primgroup_size = 64; /* recommended with a GS */ + + if ((rctx-rasterizer rctx-rasterizer-pa_sc_line_stipple) || + (rctx-b.screen-debug_flags DBG_SWITCH_ON_EOP)) { + ia_switch_on_eop = true; + } + + if (rctx-b.streamout.streamout_enabled || + rctx-b.streamout.prims_gen_query_enabled) + partial_vs_wave = true; + + r600_write_context_reg(cs, CM_R_028AA8_IA_MULTI_VGT_PARAM, + S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | + S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | + S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1)); + } + /* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles, * even though it should have no effect on those. */ if (rctx-b.chip_class == R600 rctx-rasterizer) { diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 8405fbb..17568ab 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -3747,6 +3747,17 @@ #define SQ_TEX_INST_SAMPLE_C_G_LB 0x1E #define SQ_TEX_INST_SAMPLE_C_G_LZ 0x1F +#define CM_R_028AA8_IA_MULTI_VGT_PARAM0x028AA8 +#define S_028AA8_PRIMGROUP_SIZE(x) (((x) 0x) 0) +#define G_028AA8_PRIMGROUP_SIZE(x) (((x) 0) 0x) +#define C_028AA8_PRIMGROUP_SIZE 0x +#define S_028AA8_PARTIAL_VS_WAVE_ON(x) (((x) 0x1) 16) +#define G_028AA8_PARTIAL_VS_WAVE_ON(x) (((x) 16) 0x1) +#define C_028AA8_PARTIAL_VS_WAVE_ON 0xFFFE +#define S_028AA8_SWITCH_ON_EOP(x)(((x) 0x1) 17) +#define G_028AA8_SWITCH_ON_EOP(x)(((x) 17) 0x1) +#define C_028AA8_SWITCH_ON_EOP 0xFFFD + /* async DMA packets */ #define DMA_PACKET(cmd, t, s, n) cmd) 0xF) 28) |\ (((t) 0x1) 23) | \ -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] radeonsi: set PARTIAL_VS_WAVE(0) when appropriate
From: Marek Olšák marek.ol...@amd.com --- src/gallium/drivers/radeonsi/si_state_draw.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 2e999f6..f5d6550 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -389,6 +389,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, /* SWITCH_ON_EOP(0) is always preferable. */ bool wd_switch_on_eop = false; bool ia_switch_on_eop = false; + bool partial_vs_wave = false; /* This is a hardware requirement. */ if ((rs rs-line_stipple_enable) || @@ -397,6 +398,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, wd_switch_on_eop = true; } + if (sctx-b.streamout.streamout_enabled || + sctx-b.streamout.prims_gen_query_enabled) + partial_vs_wave = true; + if (sctx-b.chip_class = CIK) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. @@ -421,7 +426,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, } return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | - S_028AA8_PARTIAL_VS_WAVE_ON(1) | + S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) | S_028AA8_WD_SWITCH_ON_EOP(sctx-b.chip_class = CIK ? wd_switch_on_eop : 0); } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v4 1/6] mesa: add ARB_conditional_render_inverted flags
Also add an extension bit so we can safely enable Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de --- src/mesa/main/condrender.c | 10 -- src/mesa/main/extensions.c | 1 + src/mesa/main/mtypes.h | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/condrender.c b/src/mesa/main/condrender.c index 0ad1e5c2..90ae566 100644 --- a/src/mesa/main/condrender.c +++ b/src/mesa/main/condrender.c @@ -77,8 +77,14 @@ _mesa_BeginConditionalRender(GLuint queryId, GLenum mode) case GL_QUERY_NO_WAIT: case GL_QUERY_BY_REGION_WAIT: case GL_QUERY_BY_REGION_NO_WAIT: - /* OK */ - break; + break; /* OK */ + case GL_QUERY_WAIT_INVERTED: + case GL_QUERY_NO_WAIT_INVERTED: + case GL_QUERY_BY_REGION_WAIT_INVERTED: + case GL_QUERY_BY_REGION_NO_WAIT_INVERTED: + if (ctx-Extensions.ARB_conditional_render_inverted) + break; /* OK */ +/* fallthrough - invalid */ default: _mesa_error(ctx, GL_INVALID_ENUM, glBeginConditionalRender(mode=%s), _mesa_lookup_enum_by_nr(mode)); diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index c5bd7b3..553c01e 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -94,6 +94,7 @@ static const struct extension extension_table[] = { { GL_ARB_color_buffer_float, o(ARB_color_buffer_float), GL, 2004 }, { GL_ARB_compressed_texture_pixel_storage,o(dummy_true), GL, 2011 }, { GL_ARB_compute_shader, o(ARB_compute_shader), GL, 2012 }, + { GL_ARB_conditional_render_inverted, o(ARB_conditional_render_inverted), GL, 2014 }, { GL_ARB_copy_buffer, o(dummy_true), GL, 2008 }, { GL_ARB_copy_image, o(ARB_copy_image), GL, 2012 }, { GL_ARB_conservative_depth, o(ARB_conservative_depth), GL, 2011 }, diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 97b1ad2..cb2a4df 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3553,6 +3553,7 @@ struct gl_extensions GLboolean ARB_clear_texture; GLboolean ARB_color_buffer_float; GLboolean ARB_compute_shader; + GLboolean ARB_conditional_render_inverted; GLboolean ARB_conservative_depth; GLboolean ARB_copy_image; GLboolean ARB_depth_buffer_float; -- 1.8.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v4 4/6] nvc0: Handle ARB_conditional_render_inverted and enable it
Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de --- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 3 +- src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 61 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 3 +- src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 3 +- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index ebeb8c4..8ae78e9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -196,8 +196,9 @@ struct nvc0_context { unsigned num_tfbbufs; struct pipe_query *cond_query; - boolean cond_cond; + boolean cond_cond; /* inverted rendering condition */ uint cond_mode; + uint32_t cond_condmode; /* the calculated condition */ struct nvc0_blitctx *blit; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index 50cef1e..007f8c4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -542,46 +542,51 @@ nvc0_render_condition(struct pipe_context *pipe, struct nouveau_pushbuf *push = nvc0-base.pushbuf; struct nvc0_query *q; uint32_t cond; - boolean negated = FALSE; boolean wait = mode != PIPE_RENDER_COND_NO_WAIT mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; + if (!pq) { + cond = NVC0_3D_COND_MODE_ALWAYS; + } + else { + q = nvc0_query(pq); + /* NOTE: comparison of 2 queries only works if both have completed */ + switch (q-type) { + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + cond = condition ? NVC0_3D_COND_MODE_EQUAL : + NVC0_3D_COND_MODE_NOT_EQUAL; + wait = TRUE; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + if (likely(!condition)) { +if (unlikely(q-nesting)) + cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : + NVC0_3D_COND_MODE_ALWAYS; +else + cond = NVC0_3D_COND_MODE_RES_NON_ZERO; + } else { +cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; + } + break; + default: + assert(!render condition query not a predicate); + cond = NVC0_3D_COND_MODE_ALWAYS; + break; + } + } + nvc0-cond_query = pq; nvc0-cond_cond = condition; + nvc0-cond_condmode = cond; nvc0-cond_mode = mode; if (!pq) { PUSH_SPACE(push, 1); - IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); + IMMED_NVC0(push, NVC0_3D(COND_MODE), cond); return; } - q = nvc0_query(pq); - - /* NOTE: comparison of 2 queries only works if both have completed */ - switch (q-type) { - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - cond = negated ? NVC0_3D_COND_MODE_EQUAL : - NVC0_3D_COND_MODE_NOT_EQUAL; - wait = TRUE; - break; - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - if (likely(!negated)) { - if (unlikely(q-nesting)) -cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : - NVC0_3D_COND_MODE_ALWAYS; - else -cond = NVC0_3D_COND_MODE_RES_NON_ZERO; - } else { - cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; - } - break; - default: - assert(!render condition query not a predicate); - mode = NVC0_3D_COND_MODE_ALWAYS; - break; - } if (wait) nvc0_query_fifo_wait(push, pq); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 7c2f11a..84025ef 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -167,13 +167,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d = NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_COMPUTE: return (class_3d == NVE4_3D_CLASS) ? 1 : 0; - case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: - return 0; /* unsupported caps */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index a29f0cc..8aed43b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -1210,6 +1210,7 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) int64_t du_dx, dv_dy; int i; uint32_t mode; +
[Mesa-dev] [PATCH v4 3/6] mesa/st: Support ARB_conditional_render_inverted modes
Signed-off-by: Tobias Klausmann tobias.johannes.klausm...@mni.thm.de --- src/mesa/state_tracker/st_cb_condrender.c | 20 +++- src/mesa/state_tracker/st_extensions.c| 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_cb_condrender.c b/src/mesa/state_tracker/st_cb_condrender.c index 8776985..eff1341 100644 --- a/src/mesa/state_tracker/st_cb_condrender.c +++ b/src/mesa/state_tracker/st_cb_condrender.c @@ -55,6 +55,8 @@ st_BeginConditionalRender(struct gl_context *ctx, struct gl_query_object *q, struct st_query_object *stq = st_query_object(q); struct st_context *st = st_context(ctx); uint m; + /* Don't invert the condition for rendering by default */ + boolean invertedCond = FALSE; st_flush_bitmap_cache(st); @@ -71,12 +73,28 @@ st_BeginConditionalRender(struct gl_context *ctx, struct gl_query_object *q, case GL_QUERY_BY_REGION_NO_WAIT: m = PIPE_RENDER_COND_BY_REGION_NO_WAIT; break; + case GL_QUERY_WAIT_INVERTED: + m = PIPE_RENDER_COND_WAIT; + invertedCond = TRUE; + break; + case GL_QUERY_NO_WAIT_INVERTED: + m = PIPE_RENDER_COND_NO_WAIT; + invertedCond = TRUE; + break; + case GL_QUERY_BY_REGION_WAIT_INVERTED: + m = PIPE_RENDER_COND_BY_REGION_WAIT; + invertedCond = TRUE; + break; + case GL_QUERY_BY_REGION_NO_WAIT_INVERTED: + m = PIPE_RENDER_COND_BY_REGION_NO_WAIT; + invertedCond = TRUE; + break; default: assert(0 bad mode in st_BeginConditionalRender); m = PIPE_RENDER_COND_WAIT; } - cso_set_render_condition(st-cso_context, stq-pq, FALSE, m); + cso_set_render_condition(st-cso_context, stq-pq, invertedCond, m); } diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 24e886c..4110eb5 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -460,6 +460,7 @@ void st_init_extensions(struct pipe_screen *screen, { o(ARB_sample_shading), PIPE_CAP_SAMPLE_SHADING }, { o(ARB_draw_indirect),PIPE_CAP_DRAW_INDIRECT }, { o(ARB_derivative_control), PIPE_CAP_TGSI_FS_FINE_DERIVATIVE }, + { o(ARB_conditional_render_inverted), PIPE_CAP_CONDITIONAL_RENDER_INVERTED }, }; /* Required: render target and sampler support */ -- 1.8.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev