Re: [Mesa-dev] [PATCH] R600/SI: Add compute support for CI v2
On Mit, 2013-10-23 at 23:04 -0400, Tom Stellard wrote: From: Tom Stellard thomas.stell...@amd.com v2: - Fix LDS size calculation Reviewed-by: Michel Dänzer michel.daen...@amd.com -- Earthling Michel Dänzer| http://www.amd.com Libre software enthusiast |Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] clover: fix build after a3ed98f7aa85636579a5696bf036ec13e5c9104a
link to similiar bug: https://bugs.freedesktop.org/show_bug.cgi?id=70804 --- src/gallium/state_trackers/clover/core/kernel.cpp | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index 10eb0e0..3bd08f0 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -81,8 +81,8 @@ kernel::launch(command_queue q, 0, exec.samplers.size(), exec.samplers.data()); - q.pipe-set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), - exec.sviews.data()); + q.pipe-set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, + exec.sviews.size(), exec.sviews.data()); q.pipe-set_compute_resources(q.pipe, 0, exec.resources.size(), exec.resources.data()); q.pipe-set_global_binding(q.pipe, 0, exec.g_buffers.size(), @@ -96,7 +96,8 @@ kernel::launch(command_queue q, q.pipe-set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); q.pipe-set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); - q.pipe-set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL); + q.pipe-set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, + exec.sviews.size(), NULL); q.pipe-bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0, exec.samplers.size(), NULL); exec.unbind(); -- 1.8.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] clover: mark gcc 4.7.0 as broken
From e004b63b2896ac2c8951cfda49d4a54d72b16449 Mon Sep 17 00:00:00 2001 From: David Heidelberger david.heidelber...@ixit.cz Date: Wed, 23 Oct 2013 23:35:42 +0200 Subject: [PATCH] clover: mark gcc 4.7.0 as broken --- configure.ac | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index dc15ad4..ea6f2f3 100644 --- a/configure.ac +++ b/configure.ac @@ -109,6 +109,7 @@ if test x$GCC = xyes -a x$acv_mesa_CLANG = xno; then if test $? -eq 0; then GCC_VERSION_MAJOR=`echo $GCC_VERSION | cut -d. -f1` GCC_VERSION_MINOR=`echo $GCC_VERSION | cut -d. -f2` +GCC_VERSION_MAINTENANCE=`echo $GCC_VERSION | cut -d. -f3` fi if test $GCC_VERSION_MAJOR -lt 3 -o $GCC_VERSION_MAJOR -eq 3 -a $GCC_VERSION_MINOR -lt 3 ; then @@ -1371,8 +1372,9 @@ if test x$enable_opencl = xyes; then AC_MSG_ERROR([cannot enable OpenCL without Gallium]) fi -if test $GCC_VERSION_MAJOR -lt 4 -o $GCC_VERSION_MAJOR -eq 4 -a $GCC_VERSION_MINOR -lt 7; then -AC_MSG_ERROR([gcc = 4.7 is required to build clover]) +if test $GCC_VERSION_MAJOR -lt 4 -o \ +$GCC_VERSION_MAJOR -eq 4 -a $GCC_VERSION_MINOR -lt 7 -a $GCC_VERSION_MAINTENANCE -lt 1; then +AC_MSG_ERROR([gcc = 4.7.1 is required to build clover]) fi if test x$have_libclc = xno; then -- 1.8.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Continuous Integration
Hello everyone. I was reading through the Help Wanted page and I was wondering if 6. Automatic Testing still needed to be done. The three continuous integration systems currently I'm looking at are : • Jenkins • Buildbot • Travis-CI The first two require a dedicated build server AFAIK, while Travis is hosted but requires the code to be on github with a travis.yml file in the repository's root directory. Let me know what you think. -Yomi ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 70823] while configuring the Mesa -9.2.2 file error will be there in C++ preprocessor .. please suggest me what to do..
https://bugs.freedesktop.org/show_bug.cgi?id=70823 Kenneth Graunke kenn...@whitecape.org changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |INVALID --- Comment #1 from Kenneth Graunke kenn...@whitecape.org --- This isn't a bug. Feel free to ask for help building Mesa on IRC or one of the mailing lists. As is, you haven't given us any information we'd need to be able to help you (like the error message), so there's not much we can do. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Update MESA_INFO to eliminate error
On 10/23/2013 12:41 PM, Courtney Goeltzenleuchter wrote: If a user set MESA_INFO and the OpenGL application uses a 3.0 or later context then the MESA_INFO debug output will have an error when it queries for extensions using the deprecated enum GL_EXTENSIONS. Passing context argument allows code to return extension list directly regardless of profile. --- src/mesa/main/context.c | 2 +- src/mesa/main/debug.c | 10 +++--- src/mesa/main/debug.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) This isn't in i965 driver code, so the proper prefix would be mesa:. Otherwise, it looks good. With that change, Reviewed-by: Kenneth Graunke kenn...@whitecape.org diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 0d1f71c..8218153 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1522,7 +1522,7 @@ _mesa_make_current( struct gl_context *newCtx, * information. */ if (_mesa_getenv(MESA_INFO)) { - _mesa_print_info(); + _mesa_print_info(newCtx); } newCtx-FirstTimeCurrent = GL_FALSE; diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index 9434c1e..99b2147 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -103,7 +103,7 @@ _mesa_print_state( const char *msg, GLuint state ) /** * Print information about this Mesa version and build options. */ -void _mesa_print_info( void ) +void _mesa_print_info( struct gl_context *ctx ) { _mesa_debug(NULL, Mesa GL_VERSION = %s\n, (char *) _mesa_GetString(GL_VERSION)); @@ -111,8 +111,12 @@ void _mesa_print_info( void ) (char *) _mesa_GetString(GL_RENDERER)); _mesa_debug(NULL, Mesa GL_VENDOR = %s\n, (char *) _mesa_GetString(GL_VENDOR)); - _mesa_debug(NULL, Mesa GL_EXTENSIONS = %s\n, -(char *) _mesa_GetString(GL_EXTENSIONS)); + + /* use ctx as GL_EXTENSIONS will not work on 3.0 or higher +* core contexts. +*/ + _mesa_debug(NULL, Mesa GL_EXTENSIONS = %s\n, ctx-Extensions.String); + #if defined(THREADS) _mesa_debug(NULL, Mesa thread-safe: YES\n); #else diff --git a/src/mesa/main/debug.h b/src/mesa/main/debug.h index 8414c5e..902f595 100644 --- a/src/mesa/main/debug.h +++ b/src/mesa/main/debug.h @@ -43,7 +43,7 @@ struct gl_texture_image; extern void _mesa_print_enable_flags( const char *msg, GLuint flags ); extern void _mesa_print_state( const char *msg, GLuint state ); -extern void _mesa_print_info( void ); +extern void _mesa_print_info( struct gl_context *ctx ); extern void _mesa_init_debug( struct gl_context *ctx ); extern void ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl/gs: Fix transform feedback of gl_ClipDistance.
On Wed, Oct 23, 2013 at 01:08:42PM -0700, Paul Berry wrote: Since gl_ClipDistance is lowered from an array of floats to an array of vec4's during compilation, transform feedback has special logic to keep track of the pre-lowered array size so that attempting to perform transform feedback on gl_ClipDistance produces a result with the correct size. Previously, this special logic always consulted the vertex shader's size for gl_ClipDistance. This patch fixes it so that it uses the geometry shader's size for gl_ClipDistance when a geometry shader is in use. Fixes piglit test spec/glsl-1.50/transform-feedback-type-and-size. --- src/glsl/link_varyings.cpp | 2 +- src/glsl/linker.cpp| 2 ++ src/mesa/main/mtypes.h | 6 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 4ba6d8a..c503645 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -328,7 +328,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx, const unsigned vector_elements = this-matched_candidate-type-fields.array-vector_elements; unsigned actual_array_size = this-is_clip_distance_mesa ? - prog-Vert.ClipDistanceArraySize : + prog-LastClipDistanceArraySize : this-matched_candidate-type-array_size(); if (this-is_subscripted) { diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index b23c31a..d8f655c 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2100,6 +2100,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) validate_vertex_shader_executable(prog, sh); if (!prog-LinkStatus) goto done; + prog-LastClipDistanceArraySize = prog-Vert.ClipDistanceArraySize; _mesa_reference_shader(ctx, prog-_LinkedShaders[MESA_SHADER_VERTEX], sh); @@ -2132,6 +2133,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) validate_geometry_shader_executable(prog, sh); if (!prog-LinkStatus) goto done; + prog-LastClipDistanceArraySize = prog-Geom.ClipDistanceArraySize; _mesa_reference_shader(ctx, prog-_LinkedShaders[MESA_SHADER_GEOMETRY], sh); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 6374e8c..bc7dea4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2476,6 +2476,12 @@ struct gl_shader_program unsigned NumUserUniformStorage; struct gl_uniform_storage *UniformStorage; + /** +* Size of the gl_ClipDistance array that is output from the last pipeline +* stage before the geometry shader. Can you explain the before the geometry shader part? This is used by the transform feedback mechanism and hence represents the size of the varying output by the geometry shader (if present of course), right? This comment in turn refers to the varying output by vertex shader and _read_ by geometry shader. +*/ + GLuint LastClipDistanceArraySize; + struct gl_uniform_block *UniformBlocks; unsigned NumUniformBlocks; -- 1.8.4.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Continuous Integration
On Thu, Oct 24, 2013 at 02:55:36AM -0400, Yomi Ogunwumi wrote: Hello everyone. I was reading through the Help Wanted page and I was wondering if 6. Automatic Testing still needed to be done. The three continuous integration systems currently I'm looking at are : • Jenkins • Buildbot • Travis-CI I think this would be great. I'm mostly familiar with buildbot, but I don't really have any preference for which system is used. If you can get something set up and running locally, maybe we can look moving it to an fdo hosted build server. -Tom The first two require a dedicated build server AFAIK, while Travis is hosted but requires the code to be on github with a travis.yml file in the repository's root directory. Let me know what you think. -Yomi ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/4] GL_OES_get_program_binary extension
Hello; These patches introduce GL_OES_get_program_binary extension support for Mesa. There are already stub functions for this extension, patches add the missing functionality part. This is based on the 'more automatic' shader cache work I've been implementing. I wanted to implement this first as this is a standard for applications to use and the automatic cache can be built separately based on these same enablers. As well as code review I would also appreciate any testing efforts with this. I've tested this with my own test apps but as you can imagine the coverage ain't that big. I'm also thinking of building piglit test cases to exercise cache shader but that is still on planning stage. The spec for extension is here: http://www.khronos.org/registry/gles/extensions/OES/OES_get_program_binary.txt Tapani Pälli (4): glsl: export populate_symbol_table mesa: iterate method for string_to_uint_map glsl: add ir_cache class and functions for shader serialization mesa: OES_get_program_binary functionality src/glsl/Makefile.sources |2 + src/glsl/ir_cache.h | 691 + src/glsl/ir_cache_serialize.cpp | 967 src/glsl/ir_cache_unserialize.cpp | 1508 + src/glsl/linker.cpp |2 +- src/glsl/linker.h |3 + src/mesa/main/shaderapi.c | 46 +- src/mesa/program/hash_table.h |8 + 8 files changed, 3219 insertions(+), 8 deletions(-) create mode 100644 src/glsl/ir_cache.h create mode 100644 src/glsl/ir_cache_serialize.cpp create mode 100644 src/glsl/ir_cache_unserialize.cpp -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] glsl: export populate_symbol_table
shader cache needs this to allocate and fill gl_shader symbol table Signed-off-by: Tapani Pälli tapani.pa...@intel.com --- src/glsl/linker.cpp | 2 +- src/glsl/linker.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index b23c31a..63127ff 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -844,7 +844,7 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) /** * Populates a shaders symbol table with all global declarations */ -static void +void populate_symbol_table(gl_shader *sh) { sh-symbols = new(sh) glsl_symbol_table; diff --git a/src/glsl/linker.h b/src/glsl/linker.h index 887cd33..c80c0c8 100644 --- a/src/glsl/linker.h +++ b/src/glsl/linker.h @@ -26,6 +26,9 @@ #ifndef GLSL_LINKER_H #define GLSL_LINKER_H +void +populate_symbol_table(gl_shader *sh); + extern bool link_function_calls(gl_shader_program *prog, gl_shader *main, gl_shader **shader_list, unsigned num_shaders); -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] mesa: iterate method for string_to_uint_map
shader cache requires this to be able to cache gl_shader_program Signed-off-by: Tapani Pälli tapani.pa...@intel.com --- src/mesa/program/hash_table.h | 8 1 file changed, 8 insertions(+) diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h index e95fc49..ece43a1 100644 --- a/src/mesa/program/hash_table.h +++ b/src/mesa/program/hash_table.h @@ -229,6 +229,14 @@ public: } /** +* Runs a passed callback for the hash +*/ + void iterate(void (*func)(const void *, void *, void *), void *closure) + { + hash_table_call_foreach(this-ht, func, closure); + } + + /** * Get the value associated with a particular key * * \return -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] mesa: OES_get_program_binary functionality
Signed-off-by: Tapani Pälli tapani.pa...@intel.com --- src/mesa/main/shaderapi.c | 46 +++--- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index d3677c8..3ce7ea4 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -57,6 +57,8 @@ #include ../glsl/ir.h #include ../glsl/ir_uniform.h #include ../glsl/program.h +#include ../glsl/ir_cache.h +#include git_sha1.h /** Define this to enable shader substitution (see below) */ #define SHADER_SUBST 0 @@ -212,7 +214,6 @@ attach_shader(struct gl_context *ctx, GLuint program, GLuint shader) struct gl_shader_program *shProg; struct gl_shader *sh; GLuint i, n; - const bool same_type_disallowed = _mesa_is_gles(ctx); shProg = _mesa_lookup_shader_program_err(ctx, program, glAttachShader); @@ -1597,8 +1598,26 @@ _mesa_GetProgramBinary(GLuint program, GLsizei bufSize, GLsizei *length, if (length != NULL) *length = 0; - (void) binaryFormat; - (void) binary; + size_t size = 0; + char *data = _mesa_program_serialize(shProg, size, MESA_GIT_SHA1); + + /* we have more data that can fit to user given buffer */ + if (size bufSize) { + _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__); + if (data) + free(data); + return; + } + + if (data) { + memcpy(binary, data, size); + free(data); + } + + if (length != NULL) + *length = size; + + *binaryFormat = 0; } void GLAPIENTRY @@ -1612,10 +1631,23 @@ _mesa_ProgramBinary(GLuint program, GLenum binaryFormat, if (!shProg) return; - (void) binaryFormat; - (void) binary; - (void) length; - _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__); + if (length = 0) + return; + + /* free possible existing data and initialize structure */ + _mesa_free_shader_program_data(ctx, shProg); + _mesa_init_shader_program(ctx, shProg); + + /* fill structure from a binary blob */ + if (_mesa_program_unserialize(shProg, binary, length, MESA_GIT_SHA1)) { + _mesa_error(ctx, GL_INVALID_VALUE, glProgramBinary(binary incompatible)); + return; + } + + /* driver specific link, optimizations and what not */ + ctx-Driver.LinkShader(ctx, shProg); + + _mesa_ValidateProgram(program); } -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] glsl: Optimize (not A) or (not B) into not (A and B).
On Thu, Oct 24, 2013 at 2:19 AM, Matt Turner matts...@gmail.com wrote: A few Serious Sam 3 shaders affected: instructions in affected programs: 4384 - 4344 (-0.91%) --- src/glsl/opt_algebraic.cpp | 12 1 file changed, 12 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 37b2f02..3bf0689 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -32,8 +32,11 @@ #include ir_visitor.h #include ir_rvalue_visitor.h #include ir_optimization.h +#include ir_builder.h #include glsl_types.h +using namespace ir_builder; + namespace { /** @@ -436,6 +439,15 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) this-progress = true; return new(mem_ctx) ir_constant(ir-type, data); + } else if (op_expr[0] op_expr[0]-operation == ir_unop_logic_not + op_expr[1] op_expr[1]-operation == ir_unop_logic_not) { + /* De Morgan's Law: + *(not A) or (not B) === not (A and B) + */ + temp = logic_not(logic_and(op_expr[0]-operands[0], +op_expr[1]-operands[0])); + return swizzle_if_required(ir, temp); + this-progress = true; Returning before reporting progress? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] glsl: Optimize (not A) and (not B) into not (A or B).
On Thu, Oct 24, 2013 at 2:19 AM, Matt Turner matts...@gmail.com wrote: No shader-db changes, but seems like a good idea. --- src/glsl/opt_algebraic.cpp | 9 + 1 file changed, 9 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 3bf0689..1ce9e2d 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -401,6 +401,15 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { this-progress = true; return ir_constant::zero(mem_ctx, ir-type); + } else if (op_expr[0] op_expr[0]-operation == ir_unop_logic_not + op_expr[1] op_expr[1]-operation == ir_unop_logic_not) { + /* De Morgan's Law: + *(not A) and (not B) === not (A or B) + */ + temp = logic_not(logic_or(op_expr[0]-operands[0], + op_expr[1]-operands[0])); + return swizzle_if_required(ir, temp); + this-progress = true; Same here, returning before updating progress? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl/gs: Fix transform feedback of gl_ClipDistance.
On 24 October 2013 00:13, Pohjolainen, Topi topi.pohjolai...@intel.comwrote: On Wed, Oct 23, 2013 at 01:08:42PM -0700, Paul Berry wrote: Since gl_ClipDistance is lowered from an array of floats to an array of vec4's during compilation, transform feedback has special logic to keep track of the pre-lowered array size so that attempting to perform transform feedback on gl_ClipDistance produces a result with the correct size. Previously, this special logic always consulted the vertex shader's size for gl_ClipDistance. This patch fixes it so that it uses the geometry shader's size for gl_ClipDistance when a geometry shader is in use. Fixes piglit test spec/glsl-1.50/transform-feedback-type-and-size. --- src/glsl/link_varyings.cpp | 2 +- src/glsl/linker.cpp| 2 ++ src/mesa/main/mtypes.h | 6 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 4ba6d8a..c503645 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -328,7 +328,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx, const unsigned vector_elements = this-matched_candidate-type-fields.array-vector_elements; unsigned actual_array_size = this-is_clip_distance_mesa ? - prog-Vert.ClipDistanceArraySize : + prog-LastClipDistanceArraySize : this-matched_candidate-type-array_size(); if (this-is_subscripted) { diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index b23c31a..d8f655c 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2100,6 +2100,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) validate_vertex_shader_executable(prog, sh); if (!prog-LinkStatus) goto done; + prog-LastClipDistanceArraySize = prog-Vert.ClipDistanceArraySize; _mesa_reference_shader(ctx, prog-_LinkedShaders[MESA_SHADER_VERTEX], sh); @@ -2132,6 +2133,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) validate_geometry_shader_executable(prog, sh); if (!prog-LinkStatus) goto done; + prog-LastClipDistanceArraySize = prog-Geom.ClipDistanceArraySize; _mesa_reference_shader(ctx, prog-_LinkedShaders[MESA_SHADER_GEOMETRY], sh); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 6374e8c..bc7dea4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2476,6 +2476,12 @@ struct gl_shader_program unsigned NumUserUniformStorage; struct gl_uniform_storage *UniformStorage; + /** +* Size of the gl_ClipDistance array that is output from the last pipeline +* stage before the geometry shader. Can you explain the before the geometry shader part? This is used by the transform feedback mechanism and hence represents the size of the varying output by the geometry shader (if present of course), right? This comment in turn refers to the varying output by vertex shader and _read_ by geometry shader. Oops. I meant to say ...output from the last pipeline stage before the fragment shader. Is that clearer? In other words, for now, it's the geometry shader (if present), and the vertex shader otherwise. But in the future, when we add tessellation shaders, it'll be: - the size of gl_ClipDistance output by the geometry shader, if present. - Otherwise, the size of gl_ClipDistance output by the tessellation evaluation shader, if present. - Otherwise, the size of gl_ClipDistance output by the tessellation control shader, if present. - Otherwise, the size of gl_ClipDistance output by the vertex shader. +*/ + GLuint LastClipDistanceArraySize; + struct gl_uniform_block *UniformBlocks; unsigned NumUniformBlocks; -- 1.8.4.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/7] vl/vlc: add remove bits function
From: Christian König christian.koe...@amd.com Signed-off-by: Christian König christian.koe...@amd.com --- src/gallium/auxiliary/vl/vl_vlc.h | 12 1 file changed, 12 insertions(+) diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h index 0dc1df9..9f67ba9 100644 --- a/src/gallium/auxiliary/vl/vl_vlc.h +++ b/src/gallium/auxiliary/vl/vl_vlc.h @@ -338,4 +338,16 @@ vl_vlc_search_byte(struct vl_vlc *vlc, unsigned num_bits, uint8_t value) } } +/** + * remove num_bits bits starting at pos from the bitbuffer + */ +static INLINE void +vl_vlc_removebits(struct vl_vlc *vlc, unsigned pos, unsigned num_bits) +{ + uint64_t lo = (vlc-buffer (~0UL (pos + num_bits))) num_bits; + uint64_t hi = (vlc-buffer (~0UL (64 - pos))); + vlc-buffer = lo | hi; + vlc-invalid_bits += num_bits; +} + #endif /* vl_vlc_h */ -- 1.8.1.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] OpenMAX state tracker
Hello list, the following patch set implements an initial OpenMAX state tracker. Supported are hardware accelerated decoding of both MPEG2 and H264 using UVD on modern radeon chips. In opposite to our VDPAU implementation the H264 decoder supports Hi10P as well, but on the other hand probably contains a really large amount of bugs. As always comments are very welcome, Christian. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/7] vl/h264: split fields into SPS/PPS
From: Christian König christian.koe...@amd.com Add alot of missing fields as well. Signed-off-by: Christian König christian.koe...@amd.com --- .../drivers/nouveau/nouveau_vp3_video_bsp.c| 30 src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c | 24 +++--- src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c | 38 +- src/gallium/drivers/nouveau/nv50/nv84_video_vp.c | 8 +- src/gallium/drivers/radeon/radeon_uvd.c| 59 +++ src/gallium/include/pipe/p_video_state.h | 88 -- src/gallium/state_trackers/vdpau/decode.c | 47 ++-- 7 files changed, 169 insertions(+), 125 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c index ba2a917..6d968c1 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c @@ -204,25 +204,25 @@ nouveau_vp3_fill_picparm_h264_bsp(struct nouveau_vp3_decoder *dec, h-unk00 = 1; h-pad1 = h-pad2 = 0; h-unk = 0; - h-log2_max_frame_num_minus4 = d-log2_max_frame_num_minus4; - h-frame_mbs_only_flag = d-frame_mbs_only_flag; - h-direct_8x8_inference_flag = d-direct_8x8_inference_flag; + h-log2_max_frame_num_minus4 = d-pps-sps-log2_max_frame_num_minus4; + h-frame_mbs_only_flag = d-pps-sps-frame_mbs_only_flag; + h-direct_8x8_inference_flag = d-pps-sps-direct_8x8_inference_flag; h-width_mb = mb(dec-base.width); h-height_mb = mb(dec-base.height); - h-entropy_coding_mode_flag = d-entropy_coding_mode_flag; - h-pic_order_present_flag = d-pic_order_present_flag; - h-pic_order_cnt_type = d-pic_order_cnt_type; - h-log2_max_pic_order_cnt_lsb_minus4 = d-log2_max_pic_order_cnt_lsb_minus4; - h-delta_pic_order_always_zero_flag = d-delta_pic_order_always_zero_flag; + h-entropy_coding_mode_flag = d-pps-entropy_coding_mode_flag; + h-pic_order_present_flag = d-pps-bottom_field_pic_order_in_frame_present_flag; + h-pic_order_cnt_type = d-pps-sps-pic_order_cnt_type; + h-log2_max_pic_order_cnt_lsb_minus4 = d-pps-sps-log2_max_pic_order_cnt_lsb_minus4; + h-delta_pic_order_always_zero_flag = d-pps-sps-delta_pic_order_always_zero_flag; h-num_ref_idx_l0_active_minus1 = d-num_ref_idx_l0_active_minus1; h-num_ref_idx_l1_active_minus1 = d-num_ref_idx_l1_active_minus1; - h-weighted_pred_flag = d-weighted_pred_flag; - h-weighted_bipred_idc = d-weighted_bipred_idc; - h-pic_init_qp_minus26 = d-pic_init_qp_minus26; - h-deblocking_filter_control_present_flag = d-deblocking_filter_control_present_flag; - h-redundant_pic_cnt_present_flag = d-redundant_pic_cnt_present_flag; - h-transform_8x8_mode_flag = d-transform_8x8_mode_flag; - h-mb_adaptive_frame_field_flag = d-mb_adaptive_frame_field_flag; + h-weighted_pred_flag = d-pps-weighted_pred_flag; + h-weighted_bipred_idc = d-pps-weighted_bipred_idc; + h-pic_init_qp_minus26 = d-pps-pic_init_qp_minus26; + h-deblocking_filter_control_present_flag = d-pps-deblocking_filter_control_present_flag; + h-redundant_pic_cnt_present_flag = d-pps-redundant_pic_cnt_present_flag; + h-transform_8x8_mode_flag = d-pps-transform_8x8_mode_flag; + h-mb_adaptive_frame_field_flag = d-pps-sps-mb_adaptive_frame_field_flag; h-field_pic_flag = d-field_pic_flag; h-bottom_field_flag = d-bottom_field_flag; memset(h-real_pad, 0, sizeof(h-real_pad)); diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c index add998d..a0f5332 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c @@ -340,22 +340,22 @@ nouveau_vp3_fill_picparm_h264_vp(struct nouveau_vp3_decoder *dec, nouveau_vp3_inter_sizes(dec, 1, ring, h-bucket_size, h-inter_ring_data_size); h-u220 = 0; - h-f0 = d-mb_adaptive_frame_field_flag; - h-f1 = d-direct_8x8_inference_flag; - h-weighted_pred_flag = d-weighted_pred_flag; - h-f3 = d-constrained_intra_pred_flag; + h-f0 = d-pps-sps-mb_adaptive_frame_field_flag; + h-f1 = d-pps-sps-direct_8x8_inference_flag; + h-weighted_pred_flag = d-pps-weighted_pred_flag; + h-f3 = d-pps-constrained_intra_pred_flag; h-is_reference = d-is_reference; h-interlace = d-field_pic_flag; h-bottom_field_flag = d-bottom_field_flag; h-f7 = 0; // TODO: figure out when set.. - h-log2_max_frame_num_minus4 = d-log2_max_frame_num_minus4; + h-log2_max_frame_num_minus4 = d-pps-sps-log2_max_frame_num_minus4; h-u31_45 = 1; - h-pic_order_cnt_type = d-pic_order_cnt_type; - h-pic_init_qp_minus26 = d-pic_init_qp_minus26; - h-chroma_qp_index_offset = d-chroma_qp_index_offset; - h-second_chroma_qp_index_offset = d-second_chroma_qp_index_offset; - h-weighted_bipred_idc = d-weighted_bipred_idc; + h-pic_order_cnt_type = d-pps-sps-pic_order_cnt_type; + h-pic_init_qp_minus26 = d-pps-pic_init_qp_minus26; +
[Mesa-dev] [PATCH 2/7] radeon/uvd: fix H264 chroma format handling
From: Christian König christian.koe...@amd.com Signed-off-by: Christian König christian.koe...@amd.com --- src/gallium/drivers/radeon/radeon_uvd.c | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 134ce4e..6d87841 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -385,6 +385,21 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_ result.sps_info_flags |= pic-frame_mbs_only_flag 2; result.sps_info_flags |= pic-delta_pic_order_always_zero_flag 3; + switch (dec-base.chroma_format) { + case PIPE_VIDEO_CHROMA_FORMAT_400: + result.chroma_format = 0; + break; + case PIPE_VIDEO_CHROMA_FORMAT_420: + result.chroma_format = 1; + break; + case PIPE_VIDEO_CHROMA_FORMAT_422: + result.chroma_format = 2; + break; + case PIPE_VIDEO_CHROMA_FORMAT_444: + result.chroma_format = 3; + break; + } + result.pps_info_flags = 0; result.pps_info_flags |= pic-transform_8x8_mode_flag 0; result.pps_info_flags |= pic-redundant_pic_cnt_present_flag 1; @@ -395,7 +410,6 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_ result.pps_info_flags |= pic-pic_order_present_flag 7; result.pps_info_flags |= pic-entropy_coding_mode_flag 8; - result.chroma_format = 0x1; result.bit_depth_luma_minus8 = 0; result.bit_depth_chroma_minus8 = 0; -- 1.8.1.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/7] vl/vlc: add function to limit the vlc size
From: Christian König christian.koe...@amd.com Signed-off-by: Christian König christian.koe...@amd.com --- src/gallium/auxiliary/vl/vl_vlc.h | 53 ++- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h index 9f67ba9..ea4f93d 100644 --- a/src/gallium/auxiliary/vl/vl_vlc.h +++ b/src/gallium/auxiliary/vl/vl_vlc.h @@ -45,7 +45,6 @@ struct vl_vlc const uint8_t *data; const uint8_t *end; - unsigned num_inputs; const void *const *inputs; const unsigned*sizes; unsigned bytes_left; @@ -94,14 +93,18 @@ vl_vlc_next_input(struct vl_vlc *vlc) unsigned len = vlc-sizes[0]; assert(vlc); - assert(vlc-num_inputs); + assert(vlc-bytes_left); - vlc-bytes_left -= len; + if (len vlc-bytes_left) + vlc-bytes_left -= len; + else { + len = vlc-bytes_left; + vlc-bytes_left = 0; + } vlc-data = vlc-inputs[0]; vlc-end = vlc-data + len; - --vlc-num_inputs; ++vlc-inputs; ++vlc-sizes; } @@ -135,10 +138,11 @@ vl_vlc_fillbits(struct vl_vlc *vlc) /* if this input is depleted */ if (bytes_left == 0) { - if (vlc-num_inputs) + if (vlc-bytes_left) { /* go on to next input */ vl_vlc_next_input(vlc); - else +vl_vlc_align_data_ptr(vlc); + } else /* or give up since we don't have anymore inputs */ return; @@ -182,7 +186,6 @@ vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs, vlc-buffer = 0; vlc-invalid_bits = 32; - vlc-num_inputs = num_inputs; vlc-inputs = inputs; vlc-sizes = sizes; vlc-bytes_left = 0; @@ -190,10 +193,11 @@ vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs, for (i = 0; i num_inputs; ++i) vlc-bytes_left += sizes[i]; - vl_vlc_next_input(vlc); - vl_vlc_align_data_ptr(vlc); - vl_vlc_fillbits(vlc); - vl_vlc_fillbits(vlc); + if (vlc-bytes_left) { + vl_vlc_next_input(vlc); + vl_vlc_align_data_ptr(vlc); + vl_vlc_fillbits(vlc); + } } /** @@ -313,7 +317,7 @@ vl_vlc_search_byte(struct vl_vlc *vlc, unsigned num_bits, uint8_t value) /* if this input is depleted */ if (vlc-data == vlc-end) { - if (vlc-num_inputs) + if (vlc-bytes_left) /* go on to next input */ vl_vlc_next_input(vlc); else @@ -350,4 +354,29 @@ vl_vlc_removebits(struct vl_vlc *vlc, unsigned pos, unsigned num_bits) vlc-invalid_bits += num_bits; } +/** + * limit the number of bits left for fetching + */ +static INLINE void +vl_vlc_limit(struct vl_vlc *vlc, unsigned bits_left) +{ + assert(bits_left = vl_vlc_bits_left(vlc)); + + vl_vlc_fillbits(vlc); + if (bits_left vl_vlc_valid_bits(vlc)) { + vlc-invalid_bits = 32 - bits_left; + vlc-buffer = ~0L (vlc-invalid_bits + 32); + vlc-end = vlc-data; + vlc-bytes_left = 0; + } else { + assert((bits_left - vl_vlc_valid_bits(vlc)) % 8 == 0); + vlc-bytes_left = (bits_left - vl_vlc_valid_bits(vlc)) / 8; + if (vlc-bytes_left (vlc-end - vlc-data)) { + vlc-end = vlc-data + vlc-bytes_left; + vlc-bytes_left = 0; + } else + vlc-bytes_left -= vlc-end - vlc-data; + } +} + #endif /* vl_vlc_h */ -- 1.8.1.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/7] vl: add 400 chroma format as well
From: Christian König christian.koe...@amd.com Signed-off-by: Christian König christian.koe...@amd.com --- src/gallium/include/pipe/p_format.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index b82f08f..461d38f 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -368,6 +368,7 @@ enum pipe_format { enum pipe_video_chroma_format { + PIPE_VIDEO_CHROMA_FORMAT_400, PIPE_VIDEO_CHROMA_FORMAT_420, PIPE_VIDEO_CHROMA_FORMAT_422, PIPE_VIDEO_CHROMA_FORMAT_444 -- 1.8.1.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/7] vl/rbsp: add H.264 RBSP implementation
From: Christian König christian.koe...@amd.com Signed-off-by: Christian König christian.koe...@amd.com --- src/gallium/auxiliary/vl/vl_rbsp.h | 164 + 1 file changed, 164 insertions(+) create mode 100644 src/gallium/auxiliary/vl/vl_rbsp.h diff --git a/src/gallium/auxiliary/vl/vl_rbsp.h b/src/gallium/auxiliary/vl/vl_rbsp.h new file mode 100644 index 000..2e3da8e --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_rbsp.h @@ -0,0 +1,164 @@ +/** + * + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * Software), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ + +/* + * Authors: + * Christian König christian.koe...@amd.com + * + */ + +/* + * Functions for reading the raw byte sequence payload of H.264 + */ + +#ifndef vl_rbsp_h +#define vl_rbsp_h + +#include vl/vl_vlc.h + +struct vl_rbsp { + struct vl_vlc nal; + unsigned escaped; +}; + +/** + * Initialize the RBSP object + */ +static INLINE void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsigned num_bits) +{ + unsigned bits_left = vl_vlc_bits_left(nal); + + /* copy the position */ + rbsp-nal = *nal; + + rbsp-escaped = 0; + + /* search for the end of the NAL unit */ + while (vl_vlc_search_byte(nal, num_bits, 0x00)) { + if (vl_vlc_peekbits(nal, 24) == 0x01 || + vl_vlc_peekbits(nal, 32) == 0x0001) { + vl_vlc_limit(rbsp-nal, bits_left - vl_vlc_bits_left(nal)); + return; + } + vl_vlc_eatbits(nal, 8); + } +} + +/** + * Make at least 16 more bits available + */ +static INLINE void vl_rbsp_fillbits(struct vl_rbsp *rbsp) +{ + unsigned valid = vl_vlc_valid_bits(rbsp-nal); + unsigned i, bits; + + /* abort if we still have enough bits */ + if (valid = 32) + return; + + vl_vlc_fillbits(rbsp-nal); + + /* abort if we have less than 24 bits left in this nal */ + if (vl_vlc_bits_left(rbsp-nal) 24) + return; + + /* check that we have enough bits left from the last fillbits */ + assert(valid = rbsp-escaped); + + /* handle the already escaped bits */ + valid -= rbsp-escaped; + + /* search for the emulation prevention three byte */ + rbsp-escaped = 16; + bits = vl_vlc_valid_bits(rbsp-nal); + for (i = valid + 24; i = bits; i += 8) { + if ((vl_vlc_peekbits(rbsp-nal, i) 0xff) == 0x3) { + vl_vlc_removebits(rbsp-nal, i - 8, 8); + rbsp-escaped = bits - i; + bits -= 8; + i += 8; + } + } +} + +/** + * Return an unsigned integer from the first n bits + */ +static INLINE unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n) +{ + if (n == 0) + return 0; + + vl_rbsp_fillbits(rbsp); + return vl_vlc_get_uimsbf(rbsp-nal, n); +} + +/** + * Return an unsigned exponential Golomb encoded integer + */ +static INLINE unsigned vl_rbsp_ue(struct vl_rbsp *rbsp) +{ + unsigned bits = 0; + + vl_rbsp_fillbits(rbsp); + while (!vl_vlc_get_uimsbf(rbsp-nal, 1)) + ++bits; + + return (1 bits) - 1 + vl_rbsp_u(rbsp, bits); +} + +/** + * Return an signed exponential Golomb encoded integer + */ +static INLINE signed vl_rbsp_se(struct vl_rbsp *rbsp) +{ + signed codeNum = vl_rbsp_ue(rbsp); + if (codeNum 1) + return (codeNum + 1) 1; + else + return -(codeNum 1); +} + +/** + * Are more data available in the RBSP ? + */ +static INLINE bool vl_rbsp_more_data(struct vl_rbsp *rbsp) +{ + unsigned bits, value; + + if (vl_vlc_bits_left(rbsp-nal) 8) + return TRUE; + + bits = vl_vlc_valid_bits(rbsp-nal); + value = vl_vlc_peekbits(rbsp-nal, bits); + if (value == 0 || value == (1 (bits - 1))) + return FALSE; + + return TRUE; +} + +#endif /* vl_rbsp_h */ -- 1.8.1.2 ___ mesa-dev mailing
[Mesa-dev] [Bug 69437] Composite Bypass no longer works
https://bugs.freedesktop.org/show_bug.cgi?id=69437 U. Artie Eoff ullysses.a.e...@intel.com changed: What|Removed |Added CC||cwo...@cworth.org --- Comment #4 from U. Artie Eoff ullysses.a.e...@intel.com --- Any progress on getting this backported to the 9.2 branch? -- You are receiving this mail because: You are the QA Contact for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl/gs: Fix transform feedback of gl_ClipDistance.
On Thu, Oct 24, 2013 at 03:38:49AM -0700, Paul Berry wrote: On 24 October 2013 00:13, Pohjolainen, Topi topi.pohjolai...@intel.com wrote: On Wed, Oct 23, 2013 at 01:08:42PM -0700, Paul Berry wrote: Since gl_ClipDistance is lowered from an array of floats to an array of vec4's during compilation, transform feedback has special logic to keep track of the pre-lowered array size so that attempting to perform transform feedback on gl_ClipDistance produces a result with the correct size. Previously, this special logic always consulted the vertex shader's size for gl_ClipDistance. This patch fixes it so that it uses the geometry shader's size for gl_ClipDistance when a geometry shader is in use. Fixes piglit test spec/glsl-1.50/transform-feedback-type-and-size. --- src/glsl/link_varyings.cpp | 2 +- src/glsl/linker.cpp| 2 ++ src/mesa/main/mtypes.h | 6 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 4ba6d8a..c503645 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -328,7 +328,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx, const unsigned vector_elements = this-matched_candidate-type-fields.array-vector_elements; unsigned actual_array_size = this-is_clip_distance_mesa ? - prog-Vert.ClipDistanceArraySize : + prog-LastClipDistanceArraySize : this-matched_candidate-type-array_size(); if (this-is_subscripted) { diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index b23c31a..d8f655c 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2100,6 +2100,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) validate_vertex_shader_executable(prog, sh); if (!prog-LinkStatus) goto done; + prog-LastClipDistanceArraySize = prog-Vert.ClipDistanceArraySize; _mesa_reference_shader(ctx, prog-_LinkedShaders[MESA_SHADER_VERTEX], sh); @@ -2132,6 +2133,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) validate_geometry_shader_executable(prog, sh); if (!prog-LinkStatus) goto done; + prog-LastClipDistanceArraySize = prog-Geom.ClipDistanceArraySize; _mesa_reference_shader(ctx, prog-_LinkedShaders[MESA_SHADER_GEOMETRY], sh); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 6374e8c..bc7dea4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2476,6 +2476,12 @@ struct gl_shader_program unsigned NumUserUniformStorage; struct gl_uniform_storage *UniformStorage; + /** +* Size of the gl_ClipDistance array that is output from the last pipeline +* stage before the geometry shader. Can you explain the before the geometry shader part? This is used by the transform feedback mechanism and hence represents the size of the varying output by the geometry shader (if present of course), right? This comment in turn refers to the varying output by vertex shader and _read_ by geometry shader. Oops. I meant to say ...output from the last pipeline stage before the fragment shader. Is that clearer? In other words, for now, it's the geometry shader (if present), and the vertex shader otherwise. But in the future, when we add tessellation shaders, it'll be: - the size of gl_ClipDistance output by the geometry shader, if present. - Otherwise, the size of gl_ClipDistance output by the tessellation evaluation shader, if present. - Otherwise, the size of gl_ClipDistance output by the tessellation control shader, if present. - Otherwise, the size of gl_ClipDistance output by the vertex shader. This makes perfect sense, thanks! +*/ + GLuint LastClipDistanceArraySize; + struct gl_uniform_block *UniformBlocks; unsigned NumUniformBlocks; -- 1.8.4.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] glsl: Optimize (not A) or (not B) into not (A and B).
On Thu, Oct 24, 2013 at 1:46 AM, Erik Faye-Lund kusmab...@gmail.com wrote: On Thu, Oct 24, 2013 at 2:19 AM, Matt Turner matts...@gmail.com wrote: A few Serious Sam 3 shaders affected: instructions in affected programs: 4384 - 4344 (-0.91%) --- src/glsl/opt_algebraic.cpp | 12 1 file changed, 12 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 37b2f02..3bf0689 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -32,8 +32,11 @@ #include ir_visitor.h #include ir_rvalue_visitor.h #include ir_optimization.h +#include ir_builder.h #include glsl_types.h +using namespace ir_builder; + namespace { /** @@ -436,6 +439,15 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) this-progress = true; return new(mem_ctx) ir_constant(ir-type, data); + } else if (op_expr[0] op_expr[0]-operation == ir_unop_logic_not + op_expr[1] op_expr[1]-operation == ir_unop_logic_not) { + /* De Morgan's Law: + *(not A) or (not B) === not (A and B) + */ + temp = logic_not(logic_and(op_expr[0]-operands[0], +op_expr[1]-operands[0])); + return swizzle_if_required(ir, temp); + this-progress = true; Returning before reporting progress? Ouch! Thanks. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] clover: fix build after a3ed98f7aa85636579a5696bf036ec13e5c9104a
On 10/23/2013 01:33 PM, David Heidelberger wrote: link to similiar bug: https://bugs.freedesktop.org/show_bug.cgi?id=70804 --- src/gallium/state_trackers/clover/core/kernel.cpp | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index 10eb0e0..3bd08f0 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -81,8 +81,8 @@ kernel::launch(command_queue q, 0, exec.samplers.size(), exec.samplers.data()); - q.pipe-set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), - exec.sviews.data()); + q.pipe-set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, + exec.sviews.size(), exec.sviews.data()); q.pipe-set_compute_resources(q.pipe, 0, exec.resources.size(), exec.resources.data()); q.pipe-set_global_binding(q.pipe, 0, exec.g_buffers.size(), @@ -96,7 +96,8 @@ kernel::launch(command_queue q, q.pipe-set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); q.pipe-set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); - q.pipe-set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL); + q.pipe-set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, + exec.sviews.size(), NULL); q.pipe-bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0, exec.samplers.size(), NULL); exec.unbind(); Reviewed-by: Brian Paul bri...@vmware.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/fs: Drop no-op shifts by 0.
I noticed this in a shader in Unigine Heaven that was spilling. While it doesn't really reduce register pressure, it shaves a few instructions anyway (7955 - 7882). --- src/glsl/opt_algebraic.cpp | 8 1 file changed, 8 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 37b2f02..ff06cfc 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -387,6 +387,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) } break; + case ir_binop_rshift: + case ir_binop_lshift: + if (is_vec_zero(op_const[0])) + return ir-operands[1]; + else if (is_vec_zero(op_const[1])) + return ir-operands[0]; + break; + case ir_binop_logic_and: /* FINISHME: Also simplify (a a) to (a). */ if (is_vec_one(op_const[0])) { -- 1.8.4.rc3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] clover: fix build after a3ed98f7aa85636579a5696bf036ec13e5c9104a
On Thu, Oct 24, 2013 at 09:14:49AM -0600, Brian Paul wrote: On 10/23/2013 01:33 PM, David Heidelberger wrote: link to similiar bug: https://bugs.freedesktop.org/show_bug.cgi?id=70804 --- src/gallium/state_trackers/clover/core/kernel.cpp | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index 10eb0e0..3bd08f0 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -81,8 +81,8 @@ kernel::launch(command_queue q, 0, exec.samplers.size(), exec.samplers.data()); - q.pipe-set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), - exec.sviews.data()); + q.pipe-set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, + exec.sviews.size(), exec.sviews.data()); q.pipe-set_compute_resources(q.pipe, 0, exec.resources.size(), exec.resources.data()); q.pipe-set_global_binding(q.pipe, 0, exec.g_buffers.size(), @@ -96,7 +96,8 @@ kernel::launch(command_queue q, q.pipe-set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); q.pipe-set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); - q.pipe-set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL); + q.pipe-set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, + exec.sviews.size(), NULL); q.pipe-bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0, exec.samplers.size(), NULL); exec.unbind(); Reviewed-by: Brian Paul bri...@vmware.com I committed this patch yesterday. -Tom ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] clover: Refuse to create context with invalid properties
On Tue, 2013-10-22 at 13:38 -0700, Francisco Jerez wrote: Jan Vesely jan.ves...@rutgers.edu writes: On Mon, 2013-10-21 at 22:20 -0700, Francisco Jerez wrote: Jan Vesely jan.ves...@rutgers.edu writes: the specs say that clCreateContext reutrns error if platform value specified in properties is not a valid platform The orignal approach fials if invalid valu other than NULL pointer is provided. Fixes piglit cl-api-create-context. Honestly, I don't think this test makes much sense. It's unreasonable to expect that the CL will be able to catch any bad pointer you give it as argument and fail gracefully. The only reliable solution that comes to my mind would be to build a global hash table for each CL object type that keeps track of the valid objects that have been allocated. That seems like a lot of effort with the only purpose of finding out if the user is doing something *very* stupid and very unlikely. Hi, My assumption was that if piglit is testing it, it should be handled, moreover this specific case (platform id), is really simple. I only partly agree with your statement. The specs speak about IDs, it's implementation decision to use pointers as IDs (unless I misread something). Not completely, the official Khronos OpenCL headers define all object IDs as pointers already, and the ICD extension requires them to be pointers with a dispatch table pointer located at offset 0. That means that all implementations using an ICD loader are also going to crash on this test, no matter what Clover does. Neither ocl-icd nor the official Khronos ICD loader bother to keep track of the valid object pointers -- and I don't think there's a good reason for them to, it's a specification requirement that implies a lot of work for almost no benefit. Not doing input checks increases distance from the specs, at least in error paths, do/should we care? I'd rather treat this as a spec bug... It's the kind of thing that should really result in unspecified behavior, no real application is likely to rely on it, and most implementations are either ignoring it or not doing it reliably. Wouldn't std::set of all API relevant pointers/ids be enough to implement this? then the check could look like: (set.contains() RTTI), and we don't even need the NULL check. Yeah, that would be a slightly better solution, but it wouldn't be enough. We're likely to move towards an architecture where we're always loaded through the ICD, in that case we would crash at the ICD loader before Clover gets control -- If anywhere the fix probably belongs in the ICD loader and not in the CL implementation itself. Also note that using RTTI as you intend wouldn't work in the general case, because Clover API objects don't share a common polymorphic base class -- and in fact they can't, because API objects need to be standard layout classes in order to have the guarantee that the dispatch table pointer will be located at the expected offset so the ICD loader can find it. So, yes, I think you'd absolutely need to use a separate std::set for each object type, but again I don't think it's worth the effort. That said, we're already doing three forms of object validation: first, the pointers provided by the user are compared against NULL; second, we make sure that the dispatch table pointer is at the correct location in memory; third, if the object is part of a non-trivial class hierarchy, as is the case for events and memory objects, we use RTTI to make sure that the object is of the expected type. I don't think we want or need more validation, it would probably be more useful to drop that test from piglit. Apparently nVidia's libOpenCL fails the test as well. Hm, I should have said that this test makes nVidia crash as well. In that case, it might be better to remove the check entirely and never touch the provided pointer. It was the case before e5fc61fa, and unexpected success is imo better than a segfault vector. afaik intel opencl has this behavior. Hm... I'd like us to have a consistent behavior across all object types, and IMHO it's better to die with a segmentation fault when the user passes garbage as one of the pointer arguments than to pretend that everything is fine and a context has been successfully allocated in a non-existing platform. OK. Thanks for detailed explanation, I have two more patches that target error checking in clCreateBuffer, I'll post them soon. Jan Thanks. thanks, Jan -- Jan Vesely jan.ves...@rutgers.edu ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 69437] Composite Bypass no longer works
https://bugs.freedesktop.org/show_bug.cgi?id=69437 --- Comment #5 from Carl Worth cwo...@cworth.org --- (In reply to comment #4) Any progress on getting this backported to the 9.2 branch? I attempted a cherry-pick over to the 9.2 branch, and there were non-trivial conflicts taht I didn't feel comfortable attempting to resolve. I believe I pinged Kristian, asking if he could backport the patch. Kristian? -Carl -- You are receiving this mail because: You are the QA Contact for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/7] vl/h264: split fields into SPS/PPS
On Thu, Oct 24, 2013 at 9:14 AM, Christian König deathsim...@vodafone.de wrote: From: Christian König christian.koe...@amd.com Add alot of missing fields as well. Signed-off-by: Christian König christian.koe...@amd.com --- .../drivers/nouveau/nouveau_vp3_video_bsp.c| 30 src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c | 24 +++--- src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c | 38 +- src/gallium/drivers/nouveau/nv50/nv84_video_vp.c | 8 +- src/gallium/drivers/radeon/radeon_uvd.c| 59 +++ src/gallium/include/pipe/p_video_state.h | 88 -- src/gallium/state_trackers/vdpau/decode.c | 47 ++-- 7 files changed, 169 insertions(+), 125 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c index ba2a917..6d968c1 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c @@ -204,25 +204,25 @@ nouveau_vp3_fill_picparm_h264_bsp(struct nouveau_vp3_decoder *dec, h-unk00 = 1; h-pad1 = h-pad2 = 0; h-unk = 0; - h-log2_max_frame_num_minus4 = d-log2_max_frame_num_minus4; - h-frame_mbs_only_flag = d-frame_mbs_only_flag; - h-direct_8x8_inference_flag = d-direct_8x8_inference_flag; + h-log2_max_frame_num_minus4 = d-pps-sps-log2_max_frame_num_minus4; + h-frame_mbs_only_flag = d-pps-sps-frame_mbs_only_flag; + h-direct_8x8_inference_flag = d-pps-sps-direct_8x8_inference_flag; h-width_mb = mb(dec-base.width); h-height_mb = mb(dec-base.height); - h-entropy_coding_mode_flag = d-entropy_coding_mode_flag; - h-pic_order_present_flag = d-pic_order_present_flag; - h-pic_order_cnt_type = d-pic_order_cnt_type; - h-log2_max_pic_order_cnt_lsb_minus4 = d-log2_max_pic_order_cnt_lsb_minus4; - h-delta_pic_order_always_zero_flag = d-delta_pic_order_always_zero_flag; + h-entropy_coding_mode_flag = d-pps-entropy_coding_mode_flag; + h-pic_order_present_flag = d-pps-bottom_field_pic_order_in_frame_present_flag; + h-pic_order_cnt_type = d-pps-sps-pic_order_cnt_type; + h-log2_max_pic_order_cnt_lsb_minus4 = d-pps-sps-log2_max_pic_order_cnt_lsb_minus4; + h-delta_pic_order_always_zero_flag = d-pps-sps-delta_pic_order_always_zero_flag; h-num_ref_idx_l0_active_minus1 = d-num_ref_idx_l0_active_minus1; h-num_ref_idx_l1_active_minus1 = d-num_ref_idx_l1_active_minus1; - h-weighted_pred_flag = d-weighted_pred_flag; - h-weighted_bipred_idc = d-weighted_bipred_idc; - h-pic_init_qp_minus26 = d-pic_init_qp_minus26; - h-deblocking_filter_control_present_flag = d-deblocking_filter_control_present_flag; - h-redundant_pic_cnt_present_flag = d-redundant_pic_cnt_present_flag; - h-transform_8x8_mode_flag = d-transform_8x8_mode_flag; - h-mb_adaptive_frame_field_flag = d-mb_adaptive_frame_field_flag; + h-weighted_pred_flag = d-pps-weighted_pred_flag; + h-weighted_bipred_idc = d-pps-weighted_bipred_idc; + h-pic_init_qp_minus26 = d-pps-pic_init_qp_minus26; + h-deblocking_filter_control_present_flag = d-pps-deblocking_filter_control_present_flag; + h-redundant_pic_cnt_present_flag = d-pps-redundant_pic_cnt_present_flag; + h-transform_8x8_mode_flag = d-pps-transform_8x8_mode_flag; + h-mb_adaptive_frame_field_flag = d-pps-sps-mb_adaptive_frame_field_flag; h-field_pic_flag = d-field_pic_flag; h-bottom_field_flag = d-bottom_field_flag; memset(h-real_pad, 0, sizeof(h-real_pad)); diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c index add998d..a0f5332 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c @@ -340,22 +340,22 @@ nouveau_vp3_fill_picparm_h264_vp(struct nouveau_vp3_decoder *dec, nouveau_vp3_inter_sizes(dec, 1, ring, h-bucket_size, h-inter_ring_data_size); h-u220 = 0; - h-f0 = d-mb_adaptive_frame_field_flag; - h-f1 = d-direct_8x8_inference_flag; - h-weighted_pred_flag = d-weighted_pred_flag; - h-f3 = d-constrained_intra_pred_flag; + h-f0 = d-pps-sps-mb_adaptive_frame_field_flag; + h-f1 = d-pps-sps-direct_8x8_inference_flag; + h-weighted_pred_flag = d-pps-weighted_pred_flag; + h-f3 = d-pps-constrained_intra_pred_flag; h-is_reference = d-is_reference; h-interlace = d-field_pic_flag; h-bottom_field_flag = d-bottom_field_flag; h-f7 = 0; // TODO: figure out when set.. - h-log2_max_frame_num_minus4 = d-log2_max_frame_num_minus4; + h-log2_max_frame_num_minus4 = d-pps-sps-log2_max_frame_num_minus4; h-u31_45 = 1; - h-pic_order_cnt_type = d-pic_order_cnt_type; - h-pic_init_qp_minus26 = d-pic_init_qp_minus26; - h-chroma_qp_index_offset = d-chroma_qp_index_offset; - h-second_chroma_qp_index_offset = d-second_chroma_qp_index_offset; -
Re: [Mesa-dev] [PATCH 09/15] i965: Build the driver into a shared mesa_dri_drivers.so .
On 10/23/2013 11:35 AM, Eric Anholt wrote: Chad Versace chad.vers...@linux.intel.com writes: The bits about globalDriverAPI and megadriver_stub.c feel strongly logically disjoint from the rest of the patch. And, they really aren't related to i965. It looks like they should get separated out into a separate patch the immediately precedes this one. The problem is, if I split it out, the code won't even be compiled by anything. I don't think patches of uncompiled code make sense. That's fair. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/15] i965: Build the driver into a shared mesa_dri_drivers.so .
On 10/23/2013 11:31 AM, Eric Anholt wrote: Chad Versace chad.vers...@linux.intel.com writes: On 10/11/2013 06:03 PM, Eric Anholt wrote: Previously, we've split things such that mesa core is in libdricore, exposing the whole Mesa core interface in the global namespace, and the i965_dri.so code all links against that. Along with polluting application namespace terribly, it requires extra PLT indirections and prevents LTO. Instead, we can build all of the driver contents into the same .so with just a few symbols exposed to be referenced from the actual driver .so file, allowing LTO and reducing our exposed symbol count massively. FPS improvement on GLB2.7 with INTEL_NO_HW=1: 2.61061% +/- 1.16957% (n=50) (without LTO, just the PLT reductions from this commit) v2: Set a global driverAPI variable so loaders don't have to update to createNewScreen2() (though they may want to for thread safety). Reviewed-by: Matt Turner matts...@gmail.com (v1) --- configure.ac | 29 +++--- src/mesa/drivers/dri/Makefile.am | 54 ++- src/mesa/drivers/dri/common/Makefile.am | 3 ++ src/mesa/drivers/dri/common/dri_util.c| 19 +- src/mesa/drivers/dri/common/dri_util.h| 2 +- src/mesa/drivers/dri/common/megadriver_stub.c | 41 src/mesa/drivers/dri/i965/Makefile.am | 27 +++--- src/mesa/drivers/dri/i965/intel_screen.c | 18 +++-- src/mesa/drivers/dri/i965/intel_screen.h | 2 + 9 files changed, 162 insertions(+), 33 deletions(-) create mode 100644 src/mesa/drivers/dri/common/megadriver_stub.c +# Add a link to allow setting LD_LIBRARY_PATH/LIBGL_DRIVERS_PATH to /lib of the build tree. +all-local: mesa_dri_drivers.la + $(MKDIR_P) $(top_builddir)/$(LIB_DIR); + $(AM_V_GEN)ln -f .libs/mesa_dri_drivers.so \ +$(top_builddir)/$(LIB_DIR)/mesa_dri_drivers.so; + $(AM_V_GEN)for i in $(MEGADRIVERS); do \ + ln -f $(top_builddir)/$(LIB_DIR)/mesa_dri_drivers.so \ + $(top_builddir)/$(LIB_DIR)/$$i; \ + done; + +# hardlink each megadriver instance, but don't actually have +# mesa_dri_drivers.so in the set of final installed files. +install-data-hook: + for i in $(MEGADRIVERS); do \ + ln -f $(dridir)/mesa_dri_drivers.so \ + $(dridir)/$$i; \ + done; + $(RM) -f $(dridir)/mesa_dri_drivers.so + $(RM) -f $(dridir)/mesa_dri_drivers.la + +endif This hunk breaks `make install` for me. I'm doing an out-of-tree build and installing to --prefix=/usr. Here's the error: make[7]: Entering directory `/home/chad/exp/makepkg/abs/mesa/src/src/mesa/drivers/dri' for i in i965_dri.so; do \ ln -f /usr/lib/xorg/modules/dri/mesa_dri_drivers.so \ /usr/lib/xorg/modules/dri/$i; \ done; ln: failed to access '/usr/lib/xorg/modules/dri/mesa_dri_drivers.so': No such file or directory It seems that the Makefile never install mesa_dri_drivers.so to $(dridir), but then tries to ln it. I don't speak autotools, so I don't see how to fix this. I can't tell what might have happened from this trimmed log. I did an out of tree build, and mesa_dri_drivers.so gets successfully installed From the install-data-am target that leads to the install-data-hook being done afterwards. I've tried with big and small -j. I'm using Archlinux. That may be the differentiating factor. I'll reproduce today and send you a full log and more details. Maybe we can fix it over irc. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/fs: Drop no-op shifts by 0.
On Thu, Oct 24, 2013 at 8:53 AM, Eric Anholt e...@anholt.net wrote: I noticed this in a shader in Unigine Heaven that was spilling. While it doesn't really reduce register pressure, it shaves a few instructions anyway (7955 - 7882). --- src/glsl/opt_algebraic.cpp | 8 1 file changed, 8 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 37b2f02..ff06cfc 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -387,6 +387,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) } break; + case ir_binop_rshift: + case ir_binop_lshift: + if (is_vec_zero(op_const[0])) + return ir-operands[1]; + else if (is_vec_zero(op_const[1])) + return ir-operands[0]; + break; + case ir_binop_logic_and: /* FINISHME: Also simplify (a a) to (a). */ if (is_vec_one(op_const[0])) { -- 1.8.4.rc3 Prefix should be glsl:, but other than that Reviewed-by: Matt Turner matts...@gmail.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC] gallium/auxiliary: u_primconvert for GL_QUADS
From: Rob Clark robcl...@freedesktop.org A utility to turn quads into tri's + index buffer, which can be used by drivers for hardware which does not natively support quads. Signed-off-by: Rob Clark robcl...@freedesktop.org --- NOTE: I was planning to add a few other primitive types, and optimize to not recreate the index buffer each time. But still haven't found time for that, and what is there already is sufficiently useful (ie. needed to run gnome-shell/compiz/xbmc/etc with freedreno), so I just figured I'd send it in it's current form and see what people thought. src/gallium/auxiliary/Makefile.sources| 1 + src/gallium/auxiliary/util/u_primconvert.c| 218 ++ src/gallium/auxiliary/util/u_primconvert.h| 43 + src/gallium/drivers/freedreno/freedreno_context.c | 6 + src/gallium/drivers/freedreno/freedreno_context.h | 2 + src/gallium/drivers/freedreno/freedreno_draw.c| 6 + 6 files changed, 276 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_primconvert.c create mode 100644 src/gallium/auxiliary/util/u_primconvert.h diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index acbcef7..17ca3c1 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -127,6 +127,7 @@ C_SOURCES := \ util/u_network.c \ util/u_math.c \ util/u_mm.c \ + util/u_primconvert.c \ util/u_pstipple.c \ util/u_ringbuffer.c \ util/u_sampler.c \ diff --git a/src/gallium/auxiliary/util/u_primconvert.c b/src/gallium/auxiliary/util/u_primconvert.c new file mode 100644 index 000..575e4da --- /dev/null +++ b/src/gallium/auxiliary/util/u_primconvert.c @@ -0,0 +1,218 @@ +/* -*- mode: C; c-file-style: kr; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark robcl...@freedesktop.org + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + *Rob Clark robcl...@freedesktop.org + */ + +/** + * This module converts primitive types supported by GL but not supported by + * GLES. It is intended to help out hw designed for GLES, by remapping + * draw primitives, using an index buffer: + * + * PIPE_PRIM_QUADS - PIPE_PRIM_TRIANGLES + * others.. tbd.. + * + */ + +#include pipe/p_state.h + +#include util/u_primconvert.h +#include util/u_memory.h +#include util/u_inlines.h + +struct primconvert_context { + struct pipe_context *pipe; + struct pipe_index_buffer saved_ib; + + // TODO we could cache/recycle the indexbuf created to translate prims.. +}; + + +struct primconvert_context *util_primconvert_create(struct pipe_context *pipe) +{ + struct primconvert_context *pc = CALLOC_STRUCT(primconvert_context); + if (!pc) + return NULL; + pc-pipe = pipe; + return pc; +} + +void util_primconvert_destroy(struct primconvert_context *pc) +{ + util_primconvert_save_index_buffer(pc, NULL); + free(pc); +} + +void util_primconvert_save_index_buffer(struct primconvert_context *pc, + const struct pipe_index_buffer *ib) +{ + if (ib) { + pipe_resource_reference(pc-saved_ib.buffer, ib-buffer); + pc-saved_ib.index_size = ib-index_size; + pc-saved_ib.offset = ib-offset; + pc-saved_ib.user_buffer = ib-user_buffer; + } else { + pipe_resource_reference(pc-saved_ib.buffer, NULL); + } +} + +typedef uint16_t (*unpack_idx_t)(const void *src, unsigned i, struct pipe_draw_info *info); +typedef void (*convert_t)(uint16_t *dst, const void *src, unpack_idx_t unpack, + unsigned start, unsigned end, struct pipe_draw_info *info); + +static uint16_t unpack_idx_1(const void *src, unsigned i, struct pipe_draw_info *info) +{ + uint16_t val = ((uint8_t *)src)[i]; + info-min_index =
Re: [Mesa-dev] [PATCH 3/7] vl/h264: split fields into SPS/PPS
Am 24.10.2013 19:18, schrieb Ilia Mirkin: On Thu, Oct 24, 2013 at 9:14 AM, Christian König deathsim...@vodafone.de wrote: From: Christian König christian.koe...@amd.com Add alot of missing fields as well. Signed-off-by: Christian König christian.koe...@amd.com --- .../drivers/nouveau/nouveau_vp3_video_bsp.c| 30 src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c | 24 +++--- src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c | 38 +- src/gallium/drivers/nouveau/nv50/nv84_video_vp.c | 8 +- src/gallium/drivers/radeon/radeon_uvd.c| 59 +++ src/gallium/include/pipe/p_video_state.h | 88 -- src/gallium/state_trackers/vdpau/decode.c | 47 ++-- 7 files changed, 169 insertions(+), 125 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c index ba2a917..6d968c1 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c @@ -204,25 +204,25 @@ nouveau_vp3_fill_picparm_h264_bsp(struct nouveau_vp3_decoder *dec, h-unk00 = 1; h-pad1 = h-pad2 = 0; h-unk = 0; - h-log2_max_frame_num_minus4 = d-log2_max_frame_num_minus4; - h-frame_mbs_only_flag = d-frame_mbs_only_flag; - h-direct_8x8_inference_flag = d-direct_8x8_inference_flag; + h-log2_max_frame_num_minus4 = d-pps-sps-log2_max_frame_num_minus4; + h-frame_mbs_only_flag = d-pps-sps-frame_mbs_only_flag; + h-direct_8x8_inference_flag = d-pps-sps-direct_8x8_inference_flag; h-width_mb = mb(dec-base.width); h-height_mb = mb(dec-base.height); - h-entropy_coding_mode_flag = d-entropy_coding_mode_flag; - h-pic_order_present_flag = d-pic_order_present_flag; - h-pic_order_cnt_type = d-pic_order_cnt_type; - h-log2_max_pic_order_cnt_lsb_minus4 = d-log2_max_pic_order_cnt_lsb_minus4; - h-delta_pic_order_always_zero_flag = d-delta_pic_order_always_zero_flag; + h-entropy_coding_mode_flag = d-pps-entropy_coding_mode_flag; + h-pic_order_present_flag = d-pps-bottom_field_pic_order_in_frame_present_flag; + h-pic_order_cnt_type = d-pps-sps-pic_order_cnt_type; + h-log2_max_pic_order_cnt_lsb_minus4 = d-pps-sps-log2_max_pic_order_cnt_lsb_minus4; + h-delta_pic_order_always_zero_flag = d-pps-sps-delta_pic_order_always_zero_flag; h-num_ref_idx_l0_active_minus1 = d-num_ref_idx_l0_active_minus1; h-num_ref_idx_l1_active_minus1 = d-num_ref_idx_l1_active_minus1; - h-weighted_pred_flag = d-weighted_pred_flag; - h-weighted_bipred_idc = d-weighted_bipred_idc; - h-pic_init_qp_minus26 = d-pic_init_qp_minus26; - h-deblocking_filter_control_present_flag = d-deblocking_filter_control_present_flag; - h-redundant_pic_cnt_present_flag = d-redundant_pic_cnt_present_flag; - h-transform_8x8_mode_flag = d-transform_8x8_mode_flag; - h-mb_adaptive_frame_field_flag = d-mb_adaptive_frame_field_flag; + h-weighted_pred_flag = d-pps-weighted_pred_flag; + h-weighted_bipred_idc = d-pps-weighted_bipred_idc; + h-pic_init_qp_minus26 = d-pps-pic_init_qp_minus26; + h-deblocking_filter_control_present_flag = d-pps-deblocking_filter_control_present_flag; + h-redundant_pic_cnt_present_flag = d-pps-redundant_pic_cnt_present_flag; + h-transform_8x8_mode_flag = d-pps-transform_8x8_mode_flag; + h-mb_adaptive_frame_field_flag = d-pps-sps-mb_adaptive_frame_field_flag; h-field_pic_flag = d-field_pic_flag; h-bottom_field_flag = d-bottom_field_flag; memset(h-real_pad, 0, sizeof(h-real_pad)); diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c index add998d..a0f5332 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c @@ -340,22 +340,22 @@ nouveau_vp3_fill_picparm_h264_vp(struct nouveau_vp3_decoder *dec, nouveau_vp3_inter_sizes(dec, 1, ring, h-bucket_size, h-inter_ring_data_size); h-u220 = 0; - h-f0 = d-mb_adaptive_frame_field_flag; - h-f1 = d-direct_8x8_inference_flag; - h-weighted_pred_flag = d-weighted_pred_flag; - h-f3 = d-constrained_intra_pred_flag; + h-f0 = d-pps-sps-mb_adaptive_frame_field_flag; + h-f1 = d-pps-sps-direct_8x8_inference_flag; + h-weighted_pred_flag = d-pps-weighted_pred_flag; + h-f3 = d-pps-constrained_intra_pred_flag; h-is_reference = d-is_reference; h-interlace = d-field_pic_flag; h-bottom_field_flag = d-bottom_field_flag; h-f7 = 0; // TODO: figure out when set.. - h-log2_max_frame_num_minus4 = d-log2_max_frame_num_minus4; + h-log2_max_frame_num_minus4 = d-pps-sps-log2_max_frame_num_minus4; h-u31_45 = 1; - h-pic_order_cnt_type = d-pic_order_cnt_type; - h-pic_init_qp_minus26 = d-pic_init_qp_minus26; - h-chroma_qp_index_offset = d-chroma_qp_index_offset; - h-second_chroma_qp_index_offset = d-second_chroma_qp_index_offset; - h-weighted_bipred_idc =
Re: [Mesa-dev] [PATCH] i965/fs: Drop no-op shifts by 0.
Why is this tagged as i965/fs, when everything seems to happen in the glsl-optimizer? On Thu, Oct 24, 2013 at 5:53 PM, Eric Anholt e...@anholt.net wrote: I noticed this in a shader in Unigine Heaven that was spilling. While it doesn't really reduce register pressure, it shaves a few instructions anyway (7955 - 7882). --- src/glsl/opt_algebraic.cpp | 8 1 file changed, 8 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 37b2f02..ff06cfc 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -387,6 +387,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) } break; + case ir_binop_rshift: + case ir_binop_lshift: + if (is_vec_zero(op_const[0])) + return ir-operands[1]; + else if (is_vec_zero(op_const[1])) + return ir-operands[0]; + break; + Maybe update progress inside the conditionals also? But wait a minute. x shifted by 0 is x, so the latter part looks correct. But the first conditional seems to assume that 0 sifted by x is x, but it's really 0, no? Shouldn't both cases return ir-operands[0]? What am I missing? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/7] vl/h264: split fields into SPS/PPS
On Thu, Oct 24, 2013 at 1:55 PM, Christian König deathsim...@vodafone.de wrote: Am 24.10.2013 19:18, schrieb Ilia Mirkin: On Thu, Oct 24, 2013 at 9:14 AM, Christian König deathsim...@vodafone.de wrote: From: Christian König christian.koe...@amd.com Add alot of missing fields as well. Signed-off-by: Christian König christian.koe...@amd.com --- .../drivers/nouveau/nouveau_vp3_video_bsp.c| 30 src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c | 24 +++--- src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c | 38 +- src/gallium/drivers/nouveau/nv50/nv84_video_vp.c | 8 +- src/gallium/drivers/radeon/radeon_uvd.c| 59 +++ src/gallium/include/pipe/p_video_state.h | 88 -- src/gallium/state_trackers/vdpau/decode.c | 47 ++-- 7 files changed, 169 insertions(+), 125 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c index ba2a917..6d968c1 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c @@ -204,25 +204,25 @@ nouveau_vp3_fill_picparm_h264_bsp(struct nouveau_vp3_decoder *dec, h-unk00 = 1; h-pad1 = h-pad2 = 0; h-unk = 0; - h-log2_max_frame_num_minus4 = d-log2_max_frame_num_minus4; - h-frame_mbs_only_flag = d-frame_mbs_only_flag; - h-direct_8x8_inference_flag = d-direct_8x8_inference_flag; + h-log2_max_frame_num_minus4 = d-pps-sps-log2_max_frame_num_minus4; + h-frame_mbs_only_flag = d-pps-sps-frame_mbs_only_flag; + h-direct_8x8_inference_flag = d-pps-sps-direct_8x8_inference_flag; h-width_mb = mb(dec-base.width); h-height_mb = mb(dec-base.height); - h-entropy_coding_mode_flag = d-entropy_coding_mode_flag; - h-pic_order_present_flag = d-pic_order_present_flag; - h-pic_order_cnt_type = d-pic_order_cnt_type; - h-log2_max_pic_order_cnt_lsb_minus4 = d-log2_max_pic_order_cnt_lsb_minus4; - h-delta_pic_order_always_zero_flag = d-delta_pic_order_always_zero_flag; + h-entropy_coding_mode_flag = d-pps-entropy_coding_mode_flag; + h-pic_order_present_flag = d-pps-bottom_field_pic_order_in_frame_present_flag; + h-pic_order_cnt_type = d-pps-sps-pic_order_cnt_type; + h-log2_max_pic_order_cnt_lsb_minus4 = d-pps-sps-log2_max_pic_order_cnt_lsb_minus4; + h-delta_pic_order_always_zero_flag = d-pps-sps-delta_pic_order_always_zero_flag; h-num_ref_idx_l0_active_minus1 = d-num_ref_idx_l0_active_minus1; h-num_ref_idx_l1_active_minus1 = d-num_ref_idx_l1_active_minus1; - h-weighted_pred_flag = d-weighted_pred_flag; - h-weighted_bipred_idc = d-weighted_bipred_idc; - h-pic_init_qp_minus26 = d-pic_init_qp_minus26; - h-deblocking_filter_control_present_flag = d-deblocking_filter_control_present_flag; - h-redundant_pic_cnt_present_flag = d-redundant_pic_cnt_present_flag; - h-transform_8x8_mode_flag = d-transform_8x8_mode_flag; - h-mb_adaptive_frame_field_flag = d-mb_adaptive_frame_field_flag; + h-weighted_pred_flag = d-pps-weighted_pred_flag; + h-weighted_bipred_idc = d-pps-weighted_bipred_idc; + h-pic_init_qp_minus26 = d-pps-pic_init_qp_minus26; + h-deblocking_filter_control_present_flag = d-pps-deblocking_filter_control_present_flag; + h-redundant_pic_cnt_present_flag = d-pps-redundant_pic_cnt_present_flag; + h-transform_8x8_mode_flag = d-pps-transform_8x8_mode_flag; + h-mb_adaptive_frame_field_flag = d-pps-sps-mb_adaptive_frame_field_flag; h-field_pic_flag = d-field_pic_flag; h-bottom_field_flag = d-bottom_field_flag; memset(h-real_pad, 0, sizeof(h-real_pad)); diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c index add998d..a0f5332 100644 --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c @@ -340,22 +340,22 @@ nouveau_vp3_fill_picparm_h264_vp(struct nouveau_vp3_decoder *dec, nouveau_vp3_inter_sizes(dec, 1, ring, h-bucket_size, h-inter_ring_data_size); h-u220 = 0; - h-f0 = d-mb_adaptive_frame_field_flag; - h-f1 = d-direct_8x8_inference_flag; - h-weighted_pred_flag = d-weighted_pred_flag; - h-f3 = d-constrained_intra_pred_flag; + h-f0 = d-pps-sps-mb_adaptive_frame_field_flag; + h-f1 = d-pps-sps-direct_8x8_inference_flag; + h-weighted_pred_flag = d-pps-weighted_pred_flag; + h-f3 = d-pps-constrained_intra_pred_flag; h-is_reference = d-is_reference; h-interlace = d-field_pic_flag; h-bottom_field_flag = d-bottom_field_flag; h-f7 = 0; // TODO: figure out when set.. - h-log2_max_frame_num_minus4 = d-log2_max_frame_num_minus4; + h-log2_max_frame_num_minus4 = d-pps-sps-log2_max_frame_num_minus4; h-u31_45 = 1; - h-pic_order_cnt_type = d-pic_order_cnt_type; - h-pic_init_qp_minus26 =
Re: [Mesa-dev] Continuous Integration
I apologize. I'm in way over my head. I thought this would be easy for some reason but it seems I thought wrong. I'm going to step back and wrap my head around all of this... Yomi ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallium: kill off PIPE_FORMAT_Z32_UNORM with extreme prejudice
From: Roland Scheidegger srol...@vmware.com This format, while still supported in OpenGL (but optional) and glx, is just causing major nuisance everywhere and needs special code in some places, because things like 1 depth_bits don't work. It is also the reason why we chose (just like in GL) depth clear values as doubles and not floats. The format however is just a disaster, no hw (as far as I know) supports it (because hw has all float pipelines, and floats don't have enough mantissa bits), and while we CLAIM to support it in software rasterizers, fact is it cannot (and never will) really work right, unless we wouldn't use floats for depth calculations. E.g. depth offsets can't work right (small depth bias will disappear with primitive depths 1/256 due to the float calcs without enough mantissa bits), and it is generally useless (since the calculations are all float, use a float buffer if you need something better than z24 unorm). --- src/gallium/auxiliary/util/u_format.csv|1 - src/gallium/auxiliary/util/u_format.h |4 +- src/gallium/auxiliary/util/u_format_tests.c|3 - src/gallium/auxiliary/util/u_format_zs.c | 69 src/gallium/auxiliary/util/u_pack_color.h |6 -- src/gallium/auxiliary/util/u_tile.c| 56 src/gallium/drivers/ilo/ilo_format.c |2 +- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 17 - src/gallium/drivers/svga/svga_format.c |3 - src/gallium/drivers/svga/svga_pipe_misc.c |4 -- src/gallium/include/pipe/p_format.h|2 +- src/gallium/state_trackers/dri/common/dri_screen.c | 18 ++--- src/gallium/state_trackers/egl/common/egl_g3d.c|1 - src/gallium/state_trackers/glx/xlib/xm_api.c |3 - src/gallium/state_trackers/wgl/stw_pixelformat.c |1 - src/gallium/state_trackers/xa/xa_tracker.c |3 - src/gallium/state_trackers/xorg/xorg_dri2.c|3 - src/mesa/state_tracker/st_cb_fbo.c |3 - src/mesa/state_tracker/st_format.c |6 +- 19 files changed, 12 insertions(+), 193 deletions(-) diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 8d04b00..662cc59 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -126,7 +126,6 @@ PIPE_FORMAT_R5SG5SB6U_NORM, plain, 1, 1, sn5 , sn5 , un6 , , xyz1, r # Depth-stencil formats PIPE_FORMAT_S8_UINT , plain, 1, 1, up8 , , , , _x__, zs PIPE_FORMAT_Z16_UNORM , plain, 1, 1, un16, , , , x___, zs -PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, zs PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs PIPE_FORMAT_Z24_UNORM_S8_UINT , plain, 1, 1, un24, up8 , , , xy__, zs PIPE_FORMAT_S8_UINT_Z24_UNORM , plain, 1, 1, up8 , un24, , , yx__, zs diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 84f16d5..2101293 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -289,7 +289,7 @@ struct util_format_description unsigned i, unsigned j); /** -* Unpack pixels to Z32_UNORM. +* Unpack pixels to 32bit unorm. * Note: strides are in bytes. * * Only defined for depth formats. @@ -300,7 +300,7 @@ struct util_format_description unsigned width, unsigned height); /** -* Pack pixels from Z32_FLOAT. +* Pack pixels from 32bit unorm. * Note: strides are in bytes. * * Only defined for depth formats. diff --git a/src/gallium/auxiliary/util/u_format_tests.c b/src/gallium/auxiliary/util/u_format_tests.c index 64224cd..0e07dd1 100644 --- a/src/gallium/auxiliary/util/u_format_tests.c +++ b/src/gallium/auxiliary/util/u_format_tests.c @@ -347,9 +347,6 @@ util_format_test_cases[] = {PIPE_FORMAT_Z16_UNORM, PACKED_1x16(0x), PACKED_1x16(0x), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, {PIPE_FORMAT_Z16_UNORM, PACKED_1x16(0x), PACKED_1x16(0x), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, - {PIPE_FORMAT_Z32_UNORM, PACKED_1x32(0x), PACKED_1x32(0x), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, - {PIPE_FORMAT_Z32_UNORM, PACKED_1x32(0x), PACKED_1x32(0x), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, - {PIPE_FORMAT_Z32_FLOAT, PACKED_1x32(0x), PACKED_1x32(0x), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, {PIPE_FORMAT_Z32_FLOAT, PACKED_1x32(0x), PACKED_1x32(0x3f80), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c index ed45c52..f1a7b4c 100644 --- a/src/gallium/auxiliary/util/u_format_zs.c +++ b/src/gallium/auxiliary/util/u_format_zs.c
Re: [Mesa-dev] [PATCH 00/18] Implement GLX_MESA_query_renderer
Dave and Marek, Do either of you guys plan to implement support for this extension? The value to developers is obviously increased if more drivers support the extension. This extension was born from feedback that I received from people at FOSDEM and from various game developers at Game Developer Conference and elsewhere. I'd like to land this extension, and I haven't received any review. I know you guys are both pretty busy, so I don't expect detailed reviews. I would really appreciate a quick skim of the extension spec (patch 15) and an Acked-by or two. Thanks. On 10/11/2013 03:10 PM, Ian Romanick wrote: This is the completion of some work that I started back in February after FOSDEM. *blush* http://www.paranormal-entertainment.com/idr/blog/posts/2013-02-07T22:42:53Z-FOSDEM2013_Presentation/ Basically, this add a method for applications to query various aspects of the GL implementation *before* creating a context. This has a number of advantages, but the two big ones are: - Depending on what they learn, the app might just use software rendering because the implementation is black listed. - Depending on what they learn, the app might create a different kind of context (core profile vs. compatibility profile vs. ES profile). This implementation should be complete. The enums /may/ change (and it just occured to me that I didn't fix the commit message in patch 10), but I think they should be fine. They're allocated from Intel's range, and I'm still double checking that nobody else is planning to use them. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: Update MESA_INFO to eliminate error
If a user set MESA_INFO and the OpenGL application uses a 3.0 or later context then the MESA_INFO debug output will have an error when it queries for extensions using the deprecated enum GL_EXTENSIONS. Passing context argument allows code to return extension list directly regardless of profile. Commit title updated as recommended by Kenneth Graunke. --- src/mesa/main/context.c | 2 +- src/mesa/main/debug.c | 10 +++--- src/mesa/main/debug.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 0d1f71c..8218153 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1522,7 +1522,7 @@ _mesa_make_current( struct gl_context *newCtx, * information. */ if (_mesa_getenv(MESA_INFO)) { - _mesa_print_info(); + _mesa_print_info(newCtx); } newCtx-FirstTimeCurrent = GL_FALSE; diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index 9434c1e..99b2147 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -103,7 +103,7 @@ _mesa_print_state( const char *msg, GLuint state ) /** * Print information about this Mesa version and build options. */ -void _mesa_print_info( void ) +void _mesa_print_info( struct gl_context *ctx ) { _mesa_debug(NULL, Mesa GL_VERSION = %s\n, (char *) _mesa_GetString(GL_VERSION)); @@ -111,8 +111,12 @@ void _mesa_print_info( void ) (char *) _mesa_GetString(GL_RENDERER)); _mesa_debug(NULL, Mesa GL_VENDOR = %s\n, (char *) _mesa_GetString(GL_VENDOR)); - _mesa_debug(NULL, Mesa GL_EXTENSIONS = %s\n, - (char *) _mesa_GetString(GL_EXTENSIONS)); + + /* use ctx as GL_EXTENSIONS will not work on 3.0 or higher +* core contexts. +*/ + _mesa_debug(NULL, Mesa GL_EXTENSIONS = %s\n, ctx-Extensions.String); + #if defined(THREADS) _mesa_debug(NULL, Mesa thread-safe: YES\n); #else diff --git a/src/mesa/main/debug.h b/src/mesa/main/debug.h index 8414c5e..902f595 100644 --- a/src/mesa/main/debug.h +++ b/src/mesa/main/debug.h @@ -43,7 +43,7 @@ struct gl_texture_image; extern void _mesa_print_enable_flags( const char *msg, GLuint flags ); extern void _mesa_print_state( const char *msg, GLuint state ); -extern void _mesa_print_info( void ); +extern void _mesa_print_info( struct gl_context *ctx ); extern void _mesa_init_debug( struct gl_context *ctx ); extern void -- 1.8.1.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC] gallium/auxiliary: u_primconvert for GL_QUADS
On 10/24/2013 11:44 AM, Rob Clark wrote: From: Rob Clark robcl...@freedesktop.org A utility to turn quads into tri's + index buffer, which can be used by drivers for hardware which does not natively support quads. Signed-off-by: Rob Clark robcl...@freedesktop.org We already have some code for this. Check out the u_indices.c code. The svga driver uses this to convert QUADS into indexed triangles, etc. Keith wrote the original code so I don't know the details but I at least added some comments a while back. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 69437] Composite Bypass no longer works
https://bugs.freedesktop.org/show_bug.cgi?id=69437 --- Comment #6 from Kristian Høgsberg k...@bitplanet.net --- (In reply to comment #5) (In reply to comment #4) Any progress on getting this backported to the 9.2 branch? I attempted a cherry-pick over to the 9.2 branch, and there were non-trivial conflicts taht I didn't feel comfortable attempting to resolve. I believe I pinged Kristian, asking if he could backport the patch. Kristian? Yup, wasn't an trivialy backport so it took a little longer and we missed the recent release. I'll get a fix ready for the next one. -- You are receiving this mail because: You are the QA Contact for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC] gallium/auxiliary: u_primconvert for GL_QUADS
On Thu, Oct 24, 2013 at 3:26 PM, Brian Paul bri...@vmware.com wrote: On 10/24/2013 11:44 AM, Rob Clark wrote: From: Rob Clark robcl...@freedesktop.org A utility to turn quads into tri's + index buffer, which can be used by drivers for hardware which does not natively support quads. Signed-off-by: Rob Clark robcl...@freedesktop.org We already have some code for this. Check out the u_indices.c code. The svga driver uses this to convert QUADS into indexed triangles, etc. Keith wrote the original code so I don't know the details but I at least added some comments a while back. ahh.. from a quick look, I think this should work.. just need to figure out how to use it ;-) BR, -R -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] RFC clover: calculate maximum workgroup size based on device
Aaron Watry awa...@gmail.com writes: The maximum workgroup size for a given kernel is based on the capabilities of the device that it's being run on. Previously, we were just returning the maximum value of a size_t which is obviously wrong. This patch uses the device's capabilities, but doesn't take into account any resource usage which would decrease the work group size further. Suggestions/comments/fixes welcome. --- src/gallium/state_trackers/clover/api/kernel.cpp | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp index d6129e6..90bb213 100644 --- a/src/gallium/state_trackers/clover/api/kernel.cpp +++ b/src/gallium/state_trackers/clover/api/kernel.cpp @@ -156,7 +156,11 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev, switch (param) { case CL_KERNEL_WORK_GROUP_SIZE: - buf.as_scalarsize_t() = kern.max_block_size(); + //FIXME: This should be maximum that the requested device can support for + // this kernel, not the maximum value of a size_t... and just using + // dev-max_threads_per_block doesn't take into account the kernel's + // resource usage... + buf.as_scalarsize_t() = pdev-max_threads_per_block(); break; This doesn't work when pdev is NULL. And I think we could drop the FIXME comment and the kernel::max_block_size() method altogether. It seems unlikely to me that any hardware back-end will ever need the per-kernel setting, and the Gallium API doesn't support it. I've pushed a different fix [1] based on your patch that takes care of that. Thank you. [1] http://cgit.freedesktop.org/mesa/mesa/commit/?id=7463abd37d65abd4d87abe314e0629c853dd9bca case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev pgpXG5XNHwHPJ.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] clover: mark gcc 4.7.0 as broken
David Heidelberger david.heidelber...@ixit.cz writes: From e004b63b2896ac2c8951cfda49d4a54d72b16449 Mon Sep 17 00:00:00 2001 From: David Heidelberger david.heidelber...@ixit.cz Date: Wed, 23 Oct 2013 23:35:42 +0200 Subject: [PATCH] clover: mark gcc 4.7.0 as broken --- configure.ac | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index dc15ad4..ea6f2f3 100644 --- a/configure.ac +++ b/configure.ac @@ -109,6 +109,7 @@ if test x$GCC = xyes -a x$acv_mesa_CLANG = xno; then if test $? -eq 0; then GCC_VERSION_MAJOR=`echo $GCC_VERSION | cut -d. -f1` GCC_VERSION_MINOR=`echo $GCC_VERSION | cut -d. -f2` +GCC_VERSION_MAINTENANCE=`echo $GCC_VERSION | cut -d. -f3` fi if test $GCC_VERSION_MAJOR -lt 3 -o $GCC_VERSION_MAJOR -eq 3 -a $GCC_VERSION_MINOR -lt 3 ; then @@ -1371,8 +1372,9 @@ if test x$enable_opencl = xyes; then AC_MSG_ERROR([cannot enable OpenCL without Gallium]) fi -if test $GCC_VERSION_MAJOR -lt 4 -o $GCC_VERSION_MAJOR -eq 4 -a $GCC_VERSION_MINOR -lt 7; then -AC_MSG_ERROR([gcc = 4.7 is required to build clover]) +if test $GCC_VERSION_MAJOR -lt 4 -o \ +$GCC_VERSION_MAJOR -eq 4 -a $GCC_VERSION_MINOR -lt 7 -a $GCC_VERSION_MAINTENANCE -lt 1; then +AC_MSG_ERROR([gcc = 4.7.1 is required to build clover]) fi I don't think this is doing what you intended, it's going to accept e.g. gcc-4.6.1 again. And I don't think 4.7.1 is going to be good enough, it seems like the feature we rely on was fixed in 4.7.3. Thank you. if test x$have_libclc = xno; then -- 1.8.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev pgp0kZScm9Ibv.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/8] i965: Implement FS backend for ARB_sample_shading
On Sat, Oct 19, 2013 at 3:05 PM, Paul Berry stereotype...@gmail.com wrote: On 14 October 2013 10:12, Anuj Phogat anuj.pho...@gmail.com wrote: Implement the FS backend for new builtins added by the extension: in vec2 gl_SamplePosition in int gl_SampleID in int gl_NumSamples out int gl_SampleMask[] There is a lot going on in this one patch, and it's getting hard to follow all the patch review that's going on. If you wind up re-spinning this patch series, can you please break it into four separate patches, one to add support for each builtin? Yes, I'll split it in to four patches. Signed-off-by: Anuj Phogat anuj.pho...@gmail.com --- src/mesa/drivers/dri/i965/brw_fs.cpp | 109 +++ src/mesa/drivers/dri/i965/brw_fs.h | 4 + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 23 ++ src/mesa/drivers/dri/i965/brw_wm.h | 1 + 4 files changed, 137 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e5d6e4b..e4f7745 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1115,6 +1115,109 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) return reg; } +void +fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos) +{ + int num_samples = ctx-DrawBuffer-Visual.samples; This isn't safe. When compilation depends on a piece of GL state, you need to include the state in brw_wm_prog_key. Otherwise the program won't get recompiled when the value changes. To avoid unnecessary recompiles, I'd recommend adding a boolean to brw_wm_prog_key which is: - true if ctx-Multisample.Enabled num_samples != 0 (shader accesses gl_SamplePosition) - false otherwise yes, this make sense. + assert(num_samples = 0 num_samples = 8); + + /* From arb_sample_shading specification: +* When rendering to a non-multisample buffer, or if multisample +* rasterization is disabled, gl_SamplePosition will always be +* (0.5, 0.5). +*/ + if (!ctx-Multisample.Enabled || num_samples == 0) { + emit(BRW_OPCODE_MOV, dst, fs_reg(0.5f)); It looks like you're using the old, more verbose style of emitting instructions. Can we convert this (and the other instructions in this patch) to the more compact style: emit(MOV(dst, fs_reg(0.5f))); ok. + } + else { + /* For num_samples = {4, 8} */ + emit(BRW_OPCODE_MOV, dst, int_sample_pos); + emit(BRW_OPCODE_MUL, dst, dst, fs_reg(1 / 16.0f)); Like Ken, I was confused as to why we needed a separate MOV before the MUL. The assertion Ken recommended would have been a useful clue, but I'd prefer to just have explicit comments explaining why we need the MOV. How about this: /* Convert int_sample_pos to floating point */ emit(BRW_OPCODE_MOV, dst, int_sample_pos); /* Scale to the range [0, 1] */ emit(BRW_OPCODE_MUL, dst, dst, fs_reg(1 / 16.0f)); + } +} + +fs_reg * +fs_visitor::emit_samplepos_interpolation(ir_variable *ir) +{ + assert(brw-gen = 6); + + this-current_annotation = compute sample position; + fs_reg *reg = new(this-mem_ctx) fs_reg(this, ir-type); Since this code assigns to two consecutive registers, it relies on the fact that ir-type is vec2. Just to make that explicit, can we add: assert(ir-type == glsl_type::vec2_type); + fs_reg pos = *reg; + fs_reg int_sample_x = fs_reg(this, glsl_type::int_type); + fs_reg int_sample_y = fs_reg(this, glsl_type::int_type); + + /* WM will be run in MSDISPMODE_PERSAMPLE. So, only SIMD8 mode will be +* enabled. The X, Y sample positions come in as bytes in thread payload. +* Sample IDs and sample positions remain same for all four slots in a +* subspan. So, read the positions using vstride=2, width=4, hstride=0. I have similar concerns to Ken about why MSDISPMODE_PERSAMPLE implies that only SIMD8 mode will be enabled. I'm assuming the two of you have adequately resolved that. I was facing few problems with enabling 'SIMD16 only' dispatch. So, we agreed to enable 'SIMD8 only' dispatch in this series. I'll do SIMD16 in a followup patch. I'll update this comment with correct information. +*/ + emit(BRW_OPCODE_AND, int_sample_x, +fs_reg(stride(retype(brw_vec1_grf(c-sample_pos_reg, 0), + BRW_REGISTER_TYPE_D), 2, 4, 0)), +fs_reg(brw_imm_d(0xff))); If I understand correctly, this is creating the instruction AND(8) int_sample_x1:d sample_pos_reg2;4,0:d 0x00ff:d I think this works, but it would be more straightforward to do this, which just selects the X coordinate bytes from the sample_pos_reg register: MOV(8) int_sample_x1:d sample_pos_reg16;8,2:b That would have the advantage that it doesn't rely on the fact that sample IDs and sample positions are the same for all four slots in a subspan. (Note: there are some weird restrictions on
Re: [Mesa-dev] [PATCH] i965/fs: Drop no-op shifts by 0.
On Thu, Oct 24, 2013 at 11:12 AM, Erik Faye-Lund kusmab...@gmail.com wrote: But wait a minute. x shifted by 0 is x, so the latter part looks correct. But the first conditional seems to assume that 0 sifted by x is x, but it's really 0, no? Shouldn't both cases return ir-operands[0]? What am I missing? I think Eric and I should now be banned from reviewing each other's patches to opt_algebraic. :) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 69437] Composite Bypass no longer works
https://bugs.freedesktop.org/show_bug.cgi?id=69437 --- Comment #7 from Kristian Høgsberg k...@bitplanet.net --- Just pushed the backported fix to 9.2: commit 7ab2b8c4c4607817c91946dcba943b29f1bd1895 Author: Kristian Høgsberg k...@bitplanet.net Date: Thu Sep 26 12:25:11 2013 -0700 wayland: Don't rely on static variable for identifying wl_drm buffers Now that libEGL has been fixed to not leak all kinds of symbols, gbm links to its own copy of the libwayland-drm.a helper library. That means we can't rely on comparing the addresses of a static vtable symbol in that library to determine if a wl_buffer is a wl_drm_buffer. Instead, we move the vtable into the wl_drm struct and use that for comparing. Backported from 360a141f24a9d00891665b7fedb77ffb116944ca. https://bugs.freedesktop.org/show_bug.cgi?id=69437 Cc: 9.2 mesa-sta...@lists.freedesktop.org -- You are receiving this mail because: You are the QA Contact for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallivm: implement fully accurate corner filtering for seamless cube maps
On 10/23/2013 11:15 AM, srol...@vmware.com wrote: From: Roland Scheidegger srol...@vmware.com d3d10 requires that cube corners are filtered with accurate weights (that is, the weight of the non-existing corner texel should be evenly distributed to the other 3 texels). OpenGL does not require this (but recommends it). This requires us to use different filtering code, since we need per-texel weights which our 2d lerp doesn't (and can't) do. And of course the (now per element) weights need to be adjusted too for it to work. Invoke the new filtering code whenever there's an edge to keep things simpler, as it will work for edges too not just corners but of course it's only needed with corners. More ugly code for not much gain but at least a hacked up cubemap demo shows very nice corners now... Not sure yet if and how this should be configurable... v2: incorporate feedback from Jose, only use special corner filtering code when there's a corner not when there's only an edge (as corner filtering code is slower, though a perf difference was only measureable when always forcing edge code). Plus some minor style fixes. --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 164 +++-- 1 file changed, 151 insertions(+), 13 deletions(-) Looks OK to me. Reviewed-by: Brian Paul bri...@vmware.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium: kill off PIPE_FORMAT_Z32_UNORM with extreme prejudice
On 10/24/2013 12:48 PM, srol...@vmware.com wrote: From: Roland Scheidegger srol...@vmware.com This format, while still supported in OpenGL (but optional) and glx, is just causing major nuisance everywhere and needs special code in some places, because things like 1 depth_bits don't work. It is also the reason why we chose (just like in GL) depth clear values as doubles and not floats. The format however is just a disaster, no hw (as far as I know) supports it (because hw has all float pipelines, and floats don't have enough mantissa bits), and while we CLAIM to support it in software rasterizers, fact is it cannot (and never will) really work right, unless we wouldn't use floats for depth calculations. E.g. depth offsets can't work right (small depth bias will disappear with primitive depths 1/256 due to the float calcs without enough mantissa bits), and it is generally useless (since the calculations are all float, use a float buffer if you need something better than z24 unorm). Reviewed-by: Brian Paul bri...@vmware.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 15/18] docs: Import extension spec for GLX_MESA_query_renderer
On Sat, Oct 12, 2013 at 12:10 AM, Ian Romanick i...@freedesktop.org wrote: From: Ian Romanick ian.d.roman...@intel.com The enumerated values are currently allocated from Intel's range. Signed-off-by: Ian Romanick ian.d.roman...@intel.com --- docs/MESA_query_renderer.spec | 381 ++ 1 file changed, 381 insertions(+) create mode 100644 docs/MESA_query_renderer.spec diff --git a/docs/MESA_query_renderer.spec b/docs/MESA_query_renderer.spec new file mode 100644 index 000..6d683ee --- /dev/null +++ b/docs/MESA_query_renderer.spec @@ -0,0 +1,381 @@ +Name + +MESA_query_renderer + +Name Strings + +GLX_MESA_query_renderer + +Contact + +Ian Romanick ian.d.roman...@intel.com + +IP Status + +No known IP claims. + +Status + +Incomplete. DO NOT SHIP. + +Version + +Version 5, 14-February-2013 + +Number + +TBD. + +Dependencies + +GLX 1.4 is required. + +GLX_ARB_create_context and GLX_ARB_create_context_profile are required. + +This extension interacts with GLX_EXT_create_context_es2_profile and +GLX_EXT_create_context_es_profile. + +Overview + +In many situations, applications want to detect characteristics of a +rendering device before creating a context for that device. Information +gathered at this stage may guide choices the application makes about +color depth, number of samples per-pixel, texture quality, and so on. +In addition, versions of supported APIs and implementation API +preference may also guide start-up decisions made by the application. +For example, one implementation may prefer vertex data be supplied using +methods only available in a compatibility profile, but another +implementation may only support the desired version in a core profile. + +There are also cases where more than one renderer may be available per +display. For example, there is typically a hardware implementation and +a software based implementation. There are cases where an application +may want to pick one over the other. One such situation is when the +software implementation supports more features than the hardware +implementation. Another situation is when a particular version of the +hardware implementation is blacklisted due to known bugs. + +This extension provides a mechanism for the application to query all of +the available renderers for a particular display and screen. In +addition, this extension provides a mechanism for applications to create +contexts with respect to a specific renderer. + +New Procedures and Functions + +Bool glXQueryRendererIntegerMESA(Display *dpy, int screen, + int renderer, int attribute, + unsigned int *value); +Bool glXQueryCurrentRendererIntegerMESA(int attribute, unsigned int *value); + +const char *glXQueryRendererStringMESA(Display *dpy, int screen, + int renderer, int attribute); + +const char *glXQueryCurrentRendererStringMESA(int attribute); + +New Tokens + +Accepted as an attribute in glXQueryRendererIntegerMESA: Also accepted in glXQueryCurrentRendererIntegerMESA? + +GLX_RENDERER_VENDOR_ID_MESA 0x8183 +GLX_RENDERER_DEVICE_ID_MESA 0x8184 +GLX_RENDERER_VERSION_MESA0x8185 +GLX_RENDERER_ACCELERATED_MESA0x8186 +GLX_RENDERER_VIDEO_MEMORY_MESA 0x8187 +GLX_RENDERER_UNIFIED_MEMORY_ARCHITECTURE_MESA0x8188 +GLX_RENDERER_PREFERRED_PROFILE_MESA 0x8189 +GLX_RENDERER_OPENGL_CORE_PROFILE_VERSION_MESA0x818A +GLX_RENDERER_OPENGL_COMPATIBILITY_PROFILE_VERSION_MESA0x818B +GLX_RENDERER_OPENGL_ES_PROFILE_VERSION_MESA 0x818C +GLX_RENDERER_OPENGL_ES2_PROFILE_VERSION_MESA 0x818D + +Accepted as an attribute in glXQueryRendererStringMESA: Also accepted in glXQueryCurrentRendererStringMESA? + +GLX_RENDERER_VENDOR_ID_MESA +GLX_RENDERER_DEVICE_ID_MESA + +Accepted as an attribute name in *attrib_list in +glXCreateContextAttribsARB: + +GLX_RENDERER_ID_MESA 0x818E + +Additions to the OpenGL / WGL Specifications + +None. This specification is written for GLX. + +Additions to the GLX 1.4 Specification + +[Add the following to Section X.Y.Z of the GLX Specification] + +To obtain information about the available renderers for a particular +display and screen, + +void glXQueryRendererIntegerMESA(Display *dpy, int screen, int renderer, + int attribute, unsigned int *value); + +can be used. The value for attribute will be returned in
Re: [Mesa-dev] [PATCH] gallium: kill off PIPE_FORMAT_Z32_UNORM with extreme prejudice
Sounds good to me. Marek On Thu, Oct 24, 2013 at 8:48 PM, srol...@vmware.com wrote: From: Roland Scheidegger srol...@vmware.com This format, while still supported in OpenGL (but optional) and glx, is just causing major nuisance everywhere and needs special code in some places, because things like 1 depth_bits don't work. It is also the reason why we chose (just like in GL) depth clear values as doubles and not floats. The format however is just a disaster, no hw (as far as I know) supports it (because hw has all float pipelines, and floats don't have enough mantissa bits), and while we CLAIM to support it in software rasterizers, fact is it cannot (and never will) really work right, unless we wouldn't use floats for depth calculations. E.g. depth offsets can't work right (small depth bias will disappear with primitive depths 1/256 due to the float calcs without enough mantissa bits), and it is generally useless (since the calculations are all float, use a float buffer if you need something better than z24 unorm). --- src/gallium/auxiliary/util/u_format.csv|1 - src/gallium/auxiliary/util/u_format.h |4 +- src/gallium/auxiliary/util/u_format_tests.c|3 - src/gallium/auxiliary/util/u_format_zs.c | 69 src/gallium/auxiliary/util/u_pack_color.h |6 -- src/gallium/auxiliary/util/u_tile.c| 56 src/gallium/drivers/ilo/ilo_format.c |2 +- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 17 - src/gallium/drivers/svga/svga_format.c |3 - src/gallium/drivers/svga/svga_pipe_misc.c |4 -- src/gallium/include/pipe/p_format.h|2 +- src/gallium/state_trackers/dri/common/dri_screen.c | 18 ++--- src/gallium/state_trackers/egl/common/egl_g3d.c|1 - src/gallium/state_trackers/glx/xlib/xm_api.c |3 - src/gallium/state_trackers/wgl/stw_pixelformat.c |1 - src/gallium/state_trackers/xa/xa_tracker.c |3 - src/gallium/state_trackers/xorg/xorg_dri2.c|3 - src/mesa/state_tracker/st_cb_fbo.c |3 - src/mesa/state_tracker/st_format.c |6 +- 19 files changed, 12 insertions(+), 193 deletions(-) diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 8d04b00..662cc59 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -126,7 +126,6 @@ PIPE_FORMAT_R5SG5SB6U_NORM, plain, 1, 1, sn5 , sn5 , un6 , , xyz1, r # Depth-stencil formats PIPE_FORMAT_S8_UINT , plain, 1, 1, up8 , , , , _x__, zs PIPE_FORMAT_Z16_UNORM , plain, 1, 1, un16, , , , x___, zs -PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, zs PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs PIPE_FORMAT_Z24_UNORM_S8_UINT , plain, 1, 1, un24, up8 , , , xy__, zs PIPE_FORMAT_S8_UINT_Z24_UNORM , plain, 1, 1, up8 , un24, , , yx__, zs diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 84f16d5..2101293 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -289,7 +289,7 @@ struct util_format_description unsigned i, unsigned j); /** -* Unpack pixels to Z32_UNORM. +* Unpack pixels to 32bit unorm. * Note: strides are in bytes. * * Only defined for depth formats. @@ -300,7 +300,7 @@ struct util_format_description unsigned width, unsigned height); /** -* Pack pixels from Z32_FLOAT. +* Pack pixels from 32bit unorm. * Note: strides are in bytes. * * Only defined for depth formats. diff --git a/src/gallium/auxiliary/util/u_format_tests.c b/src/gallium/auxiliary/util/u_format_tests.c index 64224cd..0e07dd1 100644 --- a/src/gallium/auxiliary/util/u_format_tests.c +++ b/src/gallium/auxiliary/util/u_format_tests.c @@ -347,9 +347,6 @@ util_format_test_cases[] = {PIPE_FORMAT_Z16_UNORM, PACKED_1x16(0x), PACKED_1x16(0x), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, {PIPE_FORMAT_Z16_UNORM, PACKED_1x16(0x), PACKED_1x16(0x), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, - {PIPE_FORMAT_Z32_UNORM, PACKED_1x32(0x), PACKED_1x32(0x), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, - {PIPE_FORMAT_Z32_UNORM, PACKED_1x32(0x), PACKED_1x32(0x), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, - {PIPE_FORMAT_Z32_FLOAT, PACKED_1x32(0x), PACKED_1x32(0x), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, {PIPE_FORMAT_Z32_FLOAT, PACKED_1x32(0x), PACKED_1x32(0x3f80), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, diff --git
[Mesa-dev] [PATCH] graw: add a test rendering a huge triangle
Used to test rasterization, because we often breakdown on subdivision of triangles with long edges. Signed-off-by: Zack Rusin za...@vmware.com --- src/gallium/tests/graw/SConscript | 1 + src/gallium/tests/graw/tri-large.c | 173 + 2 files changed, 174 insertions(+) create mode 100644 src/gallium/tests/graw/tri-large.c diff --git a/src/gallium/tests/graw/SConscript b/src/gallium/tests/graw/SConscript index 8740ff3..8723807 100644 --- a/src/gallium/tests/graw/SConscript +++ b/src/gallium/tests/graw/SConscript @@ -29,6 +29,7 @@ progs = [ 'tex-srgb', 'tex-swizzle', 'tri', +'tri-large', 'tri-gs', 'tri-instanced', 'vs-test', diff --git a/src/gallium/tests/graw/tri-large.c b/src/gallium/tests/graw/tri-large.c new file mode 100644 index 000..3fbbfb3 --- /dev/null +++ b/src/gallium/tests/graw/tri-large.c @@ -0,0 +1,173 @@ +/* Display a cleared blue window. This demo has no dependencies on + * any utility code, just the graw interface and gallium. + */ + +#include graw_util.h +#include util/u_debug.h + +#include stdio.h + +static struct graw_info info; + +static const int WIDTH = 4*2048; +static const int HEIGHT = 4*2048; + + +struct vertex { + float position[4]; + float color[4]; +}; + +static boolean FlatShade = FALSE; + + +static struct vertex vertices[3] = +{ + { + { -1.0f, -1.0f, 0.0f, 1.0f }, + { 1.0f, 0.0f, 0.0f, 1.0f } + }, + { + { -1.0f, 1.0f, 0.0f, 1.0f }, + { 0.0f, 1.0f, 0.0f, 1.0f } + }, + { + { 1.0f, 1.0f, 0.0f, 1.0f }, + { 0.0f, 0.0f, 1.0f, 1.0f } + } +}; + + +static void set_vertices( void ) +{ + struct pipe_vertex_element ve[2]; + struct pipe_vertex_buffer vbuf; + void *handle; + + memset(ve, 0, sizeof ve); + + ve[0].src_offset = Offset(struct vertex, position); + ve[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[1].src_offset = Offset(struct vertex, color); + ve[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + + handle = info.ctx-create_vertex_elements_state(info.ctx, 2, ve); + info.ctx-bind_vertex_elements_state(info.ctx, handle); + + memset(vbuf, 0, sizeof vbuf); + + vbuf.stride = sizeof( struct vertex ); + vbuf.buffer_offset = 0; + vbuf.buffer = pipe_buffer_create_with_data(info.ctx, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + sizeof(vertices), + vertices); + + info.ctx-set_vertex_buffers(info.ctx, 0, 1, vbuf); +} + + +static void set_vertex_shader( void ) +{ + void *handle; + const char *text = + VERT\n + DCL IN[0]\n + DCL IN[1]\n + DCL OUT[0], POSITION\n + DCL OUT[1], COLOR\n +0: MOV OUT[1], IN[1]\n +1: MOV OUT[0], IN[0]\n +2: END\n; + + handle = graw_parse_vertex_shader(info.ctx, text); + info.ctx-bind_vs_state(info.ctx, handle); +} + + +static void set_fragment_shader( void ) +{ + void *handle; + const char *text = + FRAG\n + DCL IN[0], COLOR, LINEAR\n + DCL OUT[0], COLOR\n +0: MOV OUT[0], IN[0]\n +1: END\n; + + handle = graw_parse_fragment_shader(info.ctx, text); + info.ctx-bind_fs_state(info.ctx, handle); +} + + +static void draw( void ) +{ + union pipe_color_union clear_color = { {1,0,1,1} }; + + info.ctx-clear(info.ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); + util_draw_arrays(info.ctx, PIPE_PRIM_TRIANGLES, 0, 3); + info.ctx-flush(info.ctx, NULL, 0); + + graw_save_surface_to_file(info.ctx, info.color_surf[0], NULL); + + graw_util_flush_front(info); +} + + +static void init( void ) +{ + if (!graw_util_create_window(info, WIDTH, HEIGHT, 1, FALSE)) + exit(1); + + graw_util_default_state(info, FALSE); + + { + struct pipe_rasterizer_state rasterizer; + void *handle; + memset(rasterizer, 0, sizeof rasterizer); + rasterizer.cull_face = PIPE_FACE_NONE; + rasterizer.half_pixel_center = 1; + rasterizer.bottom_edge_rule = 1; + rasterizer.flatshade = FlatShade; + rasterizer.depth_clip = 1; + handle = info.ctx-create_rasterizer_state(info.ctx, rasterizer); + info.ctx-bind_rasterizer_state(info.ctx, handle); + } + + + graw_util_viewport(info, 0, 0, WIDTH, HEIGHT, 30, 1000); + + set_vertices(); + set_vertex_shader(); + set_fragment_shader(); +} + +static void args(int argc, char *argv[]) +{ + int i; + + for (i = 1; i argc; ) { + if (graw_parse_args(i, argc, argv)) { + /* ok */ + } + else if (strcmp(argv[i], -f) == 0) { + FlatShade = TRUE; + i++; + } + else { + printf(Invalid arg %s\n, argv[i]); + exit(1); + } + } +} + +int main( int argc, char *argv[] ) +{ + args(argc, argv); + init(); + + graw_set_display_func( draw ); + graw_main_loop(); + return 0; +} -- 1.8.3.2