VERSION | 2 bin/bugzilla_mesa.sh | 13 configure.ac | 24 debian/changelog | 6 docs/relnotes/10.6.3.html | 3 docs/relnotes/10.6.4.html | 136 ++++ include/GL/internal/dri_interface.h | 8 src/egl/drivers/dri2/egl_dri2.c | 6 src/egl/main/eglsurface.c | 3 src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 5 src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 src/gallium/drivers/r600/evergreen_state.c | 8 src/gallium/drivers/r600/r600_shader.c | 1 src/gallium/drivers/radeonsi/si_descriptors.c | 548 +++++++---------- src/gallium/drivers/radeonsi/si_pipe.h | 11 src/gallium/drivers/radeonsi/si_shader.c | 49 - src/gallium/drivers/radeonsi/si_shader.h | 1 src/gallium/drivers/radeonsi/si_state.h | 45 - src/gallium/drivers/radeonsi/si_state_draw.c | 10 src/gallium/drivers/radeonsi/si_state_shaders.c | 12 src/gallium/targets/opencl/Makefile.am | 2 src/gallium/targets/opencl/mesa.icd | 1 src/gallium/targets/opencl/mesa.icd.in | 1 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 4 src/glsl/glcpp/glcpp-parse.y | 4 src/glsl/ir.cpp | 4 src/mesa/drivers/common/meta_tex_subimage.c | 80 ++ src/mesa/drivers/dri/i965/brw_device_info.c | 10 src/mesa/drivers/dri/i965/brw_fs.cpp | 1 src/mesa/main/readpix.c | 34 - src/mesa/main/readpix.h | 10 src/mesa/main/shaderapi.c | 17 src/mesa/main/teximage.c | 47 - src/mesa/state_tracker/st_atom_texture.c | 6 src/mesa/state_tracker/st_cb_blit.c | 4 src/mesa/state_tracker/st_context.c | 1 src/mesa/state_tracker/st_context.h | 2 37 files changed, 657 insertions(+), 464 deletions(-)
New commits: commit 771d55b296899b25070287ce10bd16a73c671ab3 Author: Timo Aaltonen <[email protected]> Date: Wed Aug 12 14:59:36 2015 +0300 update the changelog diff --git a/debian/changelog b/debian/changelog index bed0c7c..40b1b68 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +mesa (10.6.4-1) UNRELEASED; urgency=medium + + * New upstream release. + + -- Timo Aaltonen <[email protected]> Wed, 12 Aug 2015 13:09:40 +0300 + mesa (10.6.3-1) unstable; urgency=medium * New upstream release. commit 6b2fcee64edadbd4db2293f5f4fc1a70e80c7251 Author: Emil Velikov <[email protected]> Date: Tue Aug 11 16:39:10 2015 +0100 docs: add release notes for 10.6.4 Signed-off-by: Emil Velikov <[email protected]> diff --git a/docs/relnotes/10.6.4.html b/docs/relnotes/10.6.4.html new file mode 100644 index 0000000..b330b87 --- /dev/null +++ b/docs/relnotes/10.6.4.html @@ -0,0 +1,136 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 10.6.4 Release Notes / August 11, 2015</h1> + +<p> +Mesa 10.6.4 is a bug fix release which fixes bugs found since the 10.6.3 release. +</p> +<p> +Mesa 10.6.4 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +TBD +</pre> + + +<h2>New features</h2> +<p>None</p> + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li> + +</ul> + + +<h2>Changes</h2> + +<p>Anuj Phogat (6):</p> +<ul> + <li>mesa: Turn get_readpixels_transfer_ops() in to a global function</li> + <li>meta: Fix transfer operations check in meta pbo path for readpixels</li> + <li>meta: Abort meta pbo path if readpixels need signed-unsigned conversion</li> + <li>meta: Don't do fragment color clamping in _mesa_meta_pbo_GetTexSubImage</li> + <li>mesa: Add a helper function _mesa_need_luminance_to_rgb_conversion()</li> + <li>meta: Fix reading luminance texture as rgba in _mesa_meta_pbo_GetTexSubImage()</li> +</ul> + +<p>Ben Widawsky (1):</p> +<ul> + <li>i965/skl: Add production thread counts and URB size</li> +</ul> + +<p>Eduardo Lima Mitev (3):</p> +<ul> + <li>mesa: Fix errors values returned by glShaderBinary()</li> + <li>mesa: Validate target before resolving tex obj in glTex(ture)SubImageXD</li> + <li>mesa: Fix error returned by glCopyTexImage2D() upon an invalid internal format</li> +</ul> + +<p>Emil Velikov (6):</p> +<ul> + <li>docs: Add checksums for mesa 10.6.3 tarballs</li> + <li>configure.ac: do not set HAVE_DRI(23) when libdrm is missing</li> + <li>egl/wayland: libdrm is a hard requirement, treat it as such</li> + <li>winsys/radeon: don't leak the fd when it is 0</li> + <li>bugzilla_mesa.sh: sort the bugs list by number</li> + <li>Update version to 10.6.4</li> +</ul> + +<p>Francisco Jerez (1):</p> +<ul> + <li>i965/fs: Fix fs_inst::regs_read() for sources in the ATTR file.</li> +</ul> + +<p>Frank Binns (2):</p> +<ul> + <li>egl/dri: Add error info needed for EGL_EXT_image_dma_buf_import extension</li> + <li>egl: Add eglQuerySurface surface type check for EGL_LARGEST_PBUFFER attrib</li> +</ul> + +<p>Igor Gnatenko (1):</p> +<ul> + <li>opencl: use versioned .so in mesa.icd</li> +</ul> + +<p>Ilia Mirkin (1):</p> +<ul> + <li>nvc0: fix geometry program revalidation of clipping params</li> +</ul> + +<p>Kenneth Graunke (1):</p> +<ul> + <li>glsl: Fix a bug where LHS swizzles of swizzles were too small.</li> +</ul> + +<p>Marek Olšák (6):</p> +<ul> + <li>st/mesa: don't call st_validate_state in BlitFramebuffer</li> + <li>radeonsi: upload shader rodata after updating scratch relocations</li> + <li>st/mesa: don't ignore texture buffer state changes</li> + <li>radeonsi: rework how shader pointers to descriptors are set</li> + <li>radeonsi: completely rework updating descriptors without CP DMA</li> + <li>r600g: fix the CB_SHADER_MASK setup</li> +</ul> + +<p>Samuel Iglesias Gonsalvez (1):</p> +<ul> + <li>glsl/glcpp: fix SIGSEGV when checking error condition for macro redefinition</li> +</ul> + +<p>Samuel Pitoiset (1):</p> +<ul> + <li>nv50: avoid segfault with enabled but unbound vertex attrib</li> +</ul> + + +</div> +</body> +</html> commit 95ecedf6d9af87b98aa07112048f495c964bd4cf Author: Emil Velikov <[email protected]> Date: Tue Aug 11 16:35:06 2015 +0100 Update version to 10.6.4 Signed-off-by: Emil Velikov <[email protected]> diff --git a/VERSION b/VERSION index 15c6f3e..827886a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.6.3 +10.6.4 commit 736f6e16d9989f01cc55dcba15ba978ba90b7748 Author: Francisco Jerez <[email protected]> Date: Wed Aug 5 16:29:30 2015 +0300 i965/fs: Fix fs_inst::regs_read() for sources in the ATTR file. Otherwise it would crash on Gen8 with scalar VS. The issue can easily be reproduced with the following patch, but I don't see any reason why it wouldn't be possible to end up with an ATTR argument here even without it. CC: [email protected] Reviewed-by: Connor Abbott <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]> (cherry picked from commit 42a18ca76057621ae7d8812b29ea2245d6ff282d) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 502ca55..d99c79b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -973,6 +973,7 @@ fs_inst::regs_read(int arg) const case IMM: return 1; case GRF: + case ATTR: case HW_REG: if (src[arg].stride == 0) { return 1; commit f13ba8a5ab537e6dcdcc8b0c1a814012202d2497 Author: Eduardo Lima Mitev <[email protected]> Date: Wed Jul 29 16:01:28 2015 +0200 mesa: Fix error returned by glCopyTexImage2D() upon an invalid internal format Page 161 of the OpenGL-ES 3.1 (PDF) spec, and page 207 of the OpenGL 4.5 (PDF), both on section '8.6. ALTERNATE TEXTURE IMAGE SPECIFICATION COMMANDS', states: "An INVALID_ENUM error is generated if an invalid value is specified for internalformat". It is currently returning INVALID_OPERATION error because _mesa_get_read_renderbuffer_for_format() is called before the internalformat argument has been validated. To fix this, we move this call down the validation process, after _mesa_base_tex_format() has been called. _mesa_base_tex_format() effectively serves as a validator for the internal format. Fixes 1 dEQP test: * dEQP-GLES3.functional.negative_api.texture.copyteximage2d_invalid_format Fixes 1 piglit test: * spec@oes_compressed_etc1_rgb8_texture@basic Reviewed-by: Ian Romanick <[email protected]> Cc: 10.6 <[email protected]> (cherry picked from commit 4b07e9a033ddb6733eba206b5bd47a2373756f7d) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 3261816..959cb9c 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -2622,13 +2622,6 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, return GL_TRUE; } - rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat); - if (rb == NULL) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCopyTexImage%dD(read buffer)", dimensions); - return GL_TRUE; - } - /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the * internalFormat. */ @@ -2641,7 +2634,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, case GL_LUMINANCE_ALPHA: break; default: - _mesa_error(ctx, GL_INVALID_VALUE, + _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%dD(internalFormat=%s)", dimensions, _mesa_lookup_enum_by_nr(internalFormat)); return GL_TRUE; @@ -2650,12 +2643,19 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, baseFormat = _mesa_base_tex_format(ctx, internalFormat); if (baseFormat < 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%dD(internalFormat=%s)", dimensions, _mesa_lookup_enum_by_nr(internalFormat)); return GL_TRUE; } + rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat); + if (rb == NULL) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCopyTexImage%dD(read buffer)", dimensions); + return GL_TRUE; + } + rb_internal_format = rb->InternalFormat; rb_base_format = _mesa_base_tex_format(ctx, rb->InternalFormat); if (_mesa_is_color_format(internalFormat)) { commit 791cf8a025ac0d610596cdfab17fc84b49df2288 Author: Eduardo Lima Mitev <[email protected]> Date: Wed Jul 29 16:01:26 2015 +0200 mesa: Validate target before resolving tex obj in glTex(ture)SubImageXD Currently, glTexSubImageXD attempt to resolve the texture object (by calling _mesa_get_current_tex_object()) before validating the given target. However, that method explicitly states that target must have been validated before calling it, so it never returns a user error. The target validation occurs later when texsubimage_error_check() is called. This patch reorganizes target validation, taking it out from the error check function and into a point before the texture object is resolved. Reviewed-by: Ian Romanick <[email protected]> Cc: 10.6 <[email protected]> (cherry picked from commit 5d64cae8427b090c42d6d38da7fb474b3ddd4eb0) [Emil Velikov: s/_mesa_enum_to_string/_mesa_lookup_enum_by_nr/] Signed-off-by: Emil Velikov <[email protected]> Conflicts: src/mesa/main/teximage.c diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 7bc1da7..3261816 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -2479,13 +2479,6 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions, return GL_TRUE; } - /* check target (proxies not allowed) */ - if (!legal_texsubimage_target(ctx, dimensions, target, dsa)) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)", - callerName, _mesa_lookup_enum_by_nr(target)); - return GL_TRUE; - } - /* level check */ if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(level=%d)", callerName, level); @@ -3515,14 +3508,6 @@ _mesa_texture_sub_image(struct gl_context *ctx, GLuint dims, { FLUSH_VERTICES(ctx, 0); - /* check target (proxies not allowed) */ - if (!legal_texsubimage_target(ctx, dims, target, dsa)) { - _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sSubImage%uD(target=%s)", - dsa ? "ture" : "", - dims, _mesa_lookup_enum_by_nr(target)); - return; - } - if (ctx->NewState & _NEW_PIXEL) _mesa_update_state(ctx); @@ -3572,6 +3557,13 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, struct gl_texture_object *texObj; struct gl_texture_image *texImage; + /* check target (proxies not allowed) */ + if (!legal_texsubimage_target(ctx, dims, target, false)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glTexSubImage%uD(target=%s)", + dims, _mesa_lookup_enum_by_nr(target)); + return; + } + texObj = _mesa_get_current_tex_object(ctx, target); if (!texObj) return; @@ -3632,6 +3624,13 @@ texturesubimage(struct gl_context *ctx, GLuint dims, return; } + /* check target (proxies not allowed) */ + if (!legal_texsubimage_target(ctx, dims, texObj->Target, true)) { + _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)", + callerName, _mesa_lookup_enum_by_nr(texObj->Target)); + return; + } + if (texsubimage_error_check(ctx, dims, texObj, texObj->Target, level, xoffset, yoffset, zoffset, width, height, depth, format, type, commit 58b2e95c1f09aafdfe5434f433ed7dc7a628e7a8 Author: Eduardo Lima Mitev <[email protected]> Date: Wed Jul 29 16:01:23 2015 +0200 mesa: Fix errors values returned by glShaderBinary() Page 68, section 7.2 'Shader Binaries" of the of the OpenGL ES 3.1, and page 88 of the OpenGL 4.5 specs state: "An INVALID_VALUE error is generated if count or length is negative. An INVALID_ENUM error is generated if binaryformat is not a supported format returned in SHADER_BINARY_FORMATS." Currently, an INVALID_OPERATION error is returned for all cases. Fixes 1 dEQP test: * dEQP-GLES3.functional.negative_api.shader.shader_binary Reviewed-by: Ian Romanick <[email protected]> Cc: 10.6 <[email protected]> (cherry picked from commit b38a50f1e3edae6079c91f73a8d9c63a2dbf512a) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index a04b287..d9834c8 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1692,12 +1692,23 @@ _mesa_ShaderBinary(GLint n, const GLuint* shaders, GLenum binaryformat, const void* binary, GLint length) { GET_CURRENT_CONTEXT(ctx); - (void) n; (void) shaders; (void) binaryformat; (void) binary; - (void) length; - _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderBinary"); + + /* Page 68, section 7.2 'Shader Binaries" of the of the OpenGL ES 3.1, and + * page 88 of the OpenGL 4.5 specs state: + * + * "An INVALID_VALUE error is generated if count or length is negative. + * An INVALID_ENUM error is generated if binaryformat is not a supported + * format returned in SHADER_BINARY_FORMATS." + */ + if (n < 0 || length < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "glShaderBinary(count or length < 0)"); + return; + } + + _mesa_error(ctx, GL_INVALID_ENUM, "glShaderBinary(format)"); } commit 1f6798a70a6d7e6db636decc6af752f9a7714906 Author: Frank Binns <[email protected]> Date: Fri Jul 31 09:11:45 2015 +0100 egl: Add eglQuerySurface surface type check for EGL_LARGEST_PBUFFER attrib Calling eglQuerySurface on a window or pixmap with the EGL_LARGEST_PBUFFER attribute resulted in the contents of the 'value' parameter being modified. This is the wrong behaviour according to the EGL spec, which states: "Querying EGL_LARGEST_PBUFFER for a pbuffer surface returns the same attribute value specified when the surface was created with eglCreatePbufferSurface. For a window or pixmap surface, the contents of value are not modified." Avoid this from happening by checking that the surface type is EGL_PBUFFER_BIT before modifying the contents of the parameter. Cc: <[email protected]> Signed-off-by: Frank Binns <[email protected]> Reviewed-by: Emil Velikov <[email protected]> Reviewed-by: Eric Anholt <[email protected]> (cherry picked from commit b2c5986ea1c8e66c4e0a05bcacbcf28c27f5b183) diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c index 76c60e9..013b2eb 100644 --- a/src/egl/main/eglsurface.c +++ b/src/egl/main/eglsurface.c @@ -309,7 +309,8 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, *value = surface->Config->ConfigID; break; case EGL_LARGEST_PBUFFER: - *value = surface->LargestPbuffer; + if (surface->Type == EGL_PBUFFER_BIT) + *value = surface->LargestPbuffer; break; case EGL_TEXTURE_FORMAT: /* texture attributes: only for pbuffers, no error otherwise */ commit 84ef345dffec02d790db13fd6257e2c08eb0d56a Author: Frank Binns <[email protected]> Date: Fri Jul 31 09:11:46 2015 +0100 egl/dri: Add error info needed for EGL_EXT_image_dma_buf_import extension Update the DRI image interface error codes to reflect the needs of the EGL_EXT_image_dma_buf_import extension. This means updating the existing error code documentation and adding a new __DRI_IMAGE_ERROR_BAD_ACCESS error code so that drivers can correctly reject unsupported pitches and offsets. Hook the new error code up in EGL to return EGL_BAD_ACCESS. Cc: <[email protected]> Signed-off-by: Frank Binns <[email protected]> Reviewed-by: Emil Velikov <[email protected]> Reviewed-by: Eric Anholt <[email protected]> (cherry picked from commit cfc3200a35647026a0b5cf188f378ce33802044b) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index c827bb6..6674af0 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -1180,7 +1180,8 @@ enum __DRIChromaSiting { }; /** - * \name Reasons that __DRIimageExtensionRec::createImageFromTexture might fail + * \name Reasons that __DRIimageExtensionRec::createImageFromTexture or + * __DRIimageExtensionRec::createImageFromDmaBufs might fail */ /*@{*/ /** Success! */ @@ -1189,11 +1190,14 @@ enum __DRIChromaSiting { /** Memory allocation failure */ #define __DRI_IMAGE_ERROR_BAD_ALLOC 1 -/** Client requested an invalid attribute for a texture object */ +/** Client requested an invalid attribute */ #define __DRI_IMAGE_ERROR_BAD_MATCH 2 /** Client requested an invalid texture object */ #define __DRI_IMAGE_ERROR_BAD_PARAMETER 3 + +/** Client requested an invalid pitch and/or offset */ +#define __DRI_IMAGE_ERROR_BAD_ACCESS 4 /*@}*/ /** diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index c33efd7..3837eac 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -1510,6 +1510,10 @@ dri2_create_image_khr_texture_error(int dri_error) egl_error = EGL_BAD_PARAMETER; break; + case __DRI_IMAGE_ERROR_BAD_ACCESS: + egl_error = EGL_BAD_ACCESS; + break; + default: assert(0); egl_error = EGL_BAD_MATCH; commit 7722a24cab4b8880d45fb723205e2eedfada2055 Author: Marek Olšák <[email protected]> Date: Sun Aug 2 15:18:36 2015 +0200 r600g: fix the CB_SHADER_MASK setup This fixes the single-sample fast clear hang. Cc: 10.6 <[email protected]> Tested-by: Dieter Nützel <[email protected]> Reviewed-by: Dave Airlie <[email protected]> (cherry picked from commit d4ad4c20617f45f71152e292ee39f020ef352bfd) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4ddbc0b..3256332 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1732,10 +1732,10 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_ r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */ - /* Always enable the first colorbuffer in CB_SHADER_MASK. This - * will assure that the alpha-test will work even if there is - * no colorbuffer bound. */ - radeon_emit(cs, 0xf | (a->dual_src_blend ? ps_colormask : 0) | fb_colormask); /* R_02823C_CB_SHADER_MASK */ + /* This must match the used export instructions exactly. + * Other values may lead to undefined behavior and hangs. + */ + radeon_emit(cs, ps_colormask); /* R_02823C_CB_SHADER_MASK */ } static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 87b6e6e..248712f 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -2482,6 +2482,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, output[j].array_base = 0; output[j].op = CF_OP_EXPORT; j++; + shader->nr_ps_color_exports++; } noutput = j; commit 880a0ce2e973d5ed9ee28c3f48fc5332128f9652 Author: Marek Olšák <[email protected]> Date: Sat Jul 25 00:53:16 2015 +0200 radeonsi: completely rework updating descriptors without CP DMA For 10.6: This fixes graphical corruption occuring on most Southern Islands Radeon GPUs. This will allow closing a lot of bugs in the bugzilla. The patch has a better explanation. Just a summary here: - The CPU always uploads a whole descriptor array to previously-unused memory. - CP DMA isn't used. - No caches need to be flushed. - All descriptors are always up-to-date in memory even after a hang, because CP DMA doesn't serve as a middle man to update them. This should bring: - better hang recovery (descriptors are always up-to-date) - better GPU performance (no KCACHE and TC flushes) - worse CPU performance for partial updates (only whole arrays are uploaded) - less used IB space (no CP_DMA and WRITE_DATA packets) - simpler code - corruption issues are fixed on SI cards Reviewed-by: Michel Dänzer <[email protected]> (cherry picked from commit b0528118dfb1af00e7d08cdb637191b80c14c2ba) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f31cccb..f6d2b2c 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -24,14 +24,23 @@ * Marek Olšák <[email protected]> */ -/* Resource binding slots and sampler states (each described with 8 or 4 dwords) - * live in memory on SI. +/* Resource binding slots and sampler states (each described with 8 or + * 4 dwords) are stored in lists in memory which is accessed by shaders + * using scalar load instructions. * - * This file is responsible for managing lists of resources and sampler states - * in memory and binding them, which means updating those structures in memory. + * This file is responsible for managing such lists. It keeps a copy of all + * descriptors in CPU memory and re-uploads a whole list if some slots have + * been changed. * - * There is also code for updating shader pointers to resources and sampler - * states. CP DMA functions are here too. + * This code is also reponsible for updating shader pointers to those lists. + * + * Note that CP DMA can't be used for updating the lists, because a GPU hang + * could leave the list in a mid-IB state and the next IB would get wrong + * descriptors and the whole context would be unusable at that point. + * (Note: The register shadowing can't be used due to the same reason) + * + * Also, uploading descriptors to newly allocated memory doesn't require + * a KCACHE flush. */ #include "radeon/r600_cs.h" @@ -42,7 +51,6 @@ #include "util/u_memory.h" #include "util/u_upload_mgr.h" -#define SI_NUM_CONTEXTS 16 /* NULL image and buffer descriptor. * @@ -139,159 +147,62 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx, } } -static void si_init_descriptors(struct si_context *sctx, - struct si_descriptors *desc, +static void si_init_descriptors(struct si_descriptors *desc, unsigned shader_userdata_index, unsigned element_dw_size, - unsigned num_elements, - void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) + unsigned num_elements) { + int i; + assert(num_elements <= sizeof(desc->enabled_mask)*8); - assert(num_elements <= sizeof(desc->dirty_mask)*8); - desc->atom.emit = (void*)emit_func; - desc->shader_userdata_offset = shader_userdata_index * 4; + desc->list = CALLOC(num_elements, element_dw_size * 4); desc->element_dw_size = element_dw_size; desc->num_elements = num_elements; - desc->context_size = num_elements * element_dw_size * 4; - - desc->buffer = (struct r600_resource*) - pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM, - PIPE_USAGE_DEFAULT, - SI_NUM_CONTEXTS * desc->context_size); - - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); + desc->list_dirty = true; /* upload the list before the next draw */ + desc->shader_userdata_offset = shader_userdata_index * 4; - /* We don't check for CS space here, because this should be called - * only once at context initialization. */ - si_emit_cp_dma_clear_buffer(sctx, desc->buffer->gpu_address, - desc->buffer->b.b.width0, 0, - R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2); + /* Initialize the array to NULL descriptors if the element size is 8. */ + if (element_dw_size == 8) + for (i = 0; i < num_elements; i++) + memcpy(desc->list + i*element_dw_size, null_descriptor, + sizeof(null_descriptor)); } static void si_release_descriptors(struct si_descriptors *desc) { pipe_resource_reference((struct pipe_resource**)&desc->buffer, NULL); + FREE(desc->list); } -static void si_update_descriptors(struct si_context *sctx, +static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc) { - if (desc->dirty_mask) { - desc->atom.num_dw = - 7 + /* copy */ - (4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask); /* update */ - - desc->atom.dirty = true; - desc->pointer_dirty = true; - sctx->shader_userdata.atom.dirty = true; - - /* TODO: Investigate if these flushes can be removed after - * adding CE support. */ - - /* The descriptors are read with the K cache. */ - sctx->b.flags |= SI_CONTEXT_INV_KCACHE; - - /* Since SI uses uncached CP DMA to update descriptors, - * we have to flush TC L2, which is used to fetch constants - * along with KCACHE. */ - if (sctx->b.chip_class == SI) - sctx->b.flags |= SI_CONTEXT_INV_TC_L2; - } else { - desc->atom.dirty = false; - } -} + unsigned list_size = desc->num_elements * desc->element_dw_size * 4; + void *ptr; -static void si_emit_descriptors(struct si_context *sctx, - struct si_descriptors *desc, - uint32_t **descriptors) -{ - struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - uint64_t va_base; - int packet_start = 0; - int packet_size = 0; - int last_index = desc->num_elements; /* point to a non-existing element */ - uint64_t dirty_mask = desc->dirty_mask; - unsigned new_context_id = (desc->current_context_id + 1) % SI_NUM_CONTEXTS; - - assert(dirty_mask); - - va_base = desc->buffer->gpu_address; - - /* Copy the descriptors to a new context slot. */ - si_emit_cp_dma_copy_buffer(sctx, - va_base + new_context_id * desc->context_size, - va_base + desc->current_context_id * desc->context_size, - desc->context_size, R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2); - - va_base += new_context_id * desc->context_size; - - /* Update the descriptors. - * Updates of consecutive descriptors are merged to one WRITE_DATA packet. - * - * XXX When unbinding lots of resources, consider clearing the memory - * with CP DMA instead of emitting zeros. - */ - while (dirty_mask) { - int i = u_bit_scan64(&dirty_mask); - - assert(i < desc->num_elements); + if (!desc->list_dirty) + return true; - if (last_index+1 == i && packet_size) { - /* Append new data at the end of the last packet. */ - packet_size += desc->element_dw_size; - cs->buf[packet_start] = PKT3(PKT3_WRITE_DATA, packet_size, 0); - } else { - /* Start a new packet. */ - uint64_t va = va_base + i * desc->element_dw_size * 4; - - packet_start = cs->cdw; - packet_size = 2 + desc->element_dw_size; - - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, packet_size, 0)); - radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(sctx->b.chip_class == SI ? - PKT3_WRITE_DATA_DST_SEL_MEM_SYNC : - PKT3_WRITE_DATA_DST_SEL_TC_L2) | - PKT3_WRITE_DATA_WR_CONFIRM | - PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME)); - radeon_emit(cs, va & 0xFFFFFFFFUL); - radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL); - } + u_upload_alloc(sctx->b.uploader, 0, list_size, + &desc->buffer_offset, + (struct pipe_resource**)&desc->buffer, &ptr); + if (!desc->buffer) + return false; /* skip the draw call */ - radeon_emit_array(cs, descriptors[i], desc->element_dw_size); + util_memcpy_cpu_to_le32(ptr, desc->list, list_size); - last_index = i; - } + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); - desc->dirty_mask = 0; - desc->current_context_id = new_context_id; + desc->list_dirty = false; + desc->pointer_dirty = true; + sctx->shader_userdata.atom.dirty = true; + return true; } /* SAMPLER VIEWS */ -static void si_emit_sampler_views(struct si_context *sctx, struct r600_atom *atom) -{ - struct si_sampler_views *views = (struct si_sampler_views*)atom; - - si_emit_descriptors(sctx, &views->desc, views->desc_data); -} - -static void si_init_sampler_views(struct si_context *sctx, - struct si_sampler_views *views) -{ - int i; - - si_init_descriptors(sctx, &views->desc, SI_SGPR_RESOURCE, - 8, SI_NUM_SAMPLER_VIEWS, si_emit_sampler_views); - - for (i = 0; i < views->desc.num_elements; i++) { - views->desc_data[i] = null_descriptor; - views->desc.dirty_mask |= 1llu << i; - } - si_update_descriptors(sctx, &views->desc); -} - static void si_release_sampler_views(struct si_sampler_views *views) { int i; @@ -332,6 +243,8 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, si_get_resource_ro_priority(rview->resource)); } + if (!views->desc.buffer) + return; r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); } @@ -354,17 +267,16 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader, rview->resource, RADEON_USAGE_READ, si_get_resource_ro_priority(rview->resource)); - pipe_sampler_view_reference(&views->views[slot], view); - views->desc_data[slot] = view_desc; + memcpy(views->desc.list + slot*8, view_desc, 8*4); views->desc.enabled_mask |= 1llu << slot; } else { pipe_sampler_view_reference(&views->views[slot], NULL); - views->desc_data[slot] = null_descriptor; + memcpy(views->desc.list + slot*8, null_descriptor, 8*4); views->desc.enabled_mask &= ~(1llu << slot); } - views->desc.dirty_mask |= 1llu << slot; + views->desc.list_dirty = true; } static void si_set_sampler_views(struct pipe_context *ctx, @@ -423,22 +335,15 @@ static void si_set_sampler_views(struct pipe_context *ctx, NULL, NULL); } } - - si_update_descriptors(sctx, &samplers->views.desc); } /* SAMPLER STATES */ -static void si_emit_sampler_states(struct si_context *sctx, struct r600_atom *atom) -{ - struct si_sampler_states *states = (struct si_sampler_states*)atom; - - si_emit_descriptors(sctx, &states->desc, states->desc_data); -} - static void si_sampler_states_begin_new_cs(struct si_context *sctx, struct si_sampler_states *states) { + if (!states->desc.buffer) + return; r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); } @@ -460,64 +365,39 @@ void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader, for (i = 0; i < count; i++) { unsigned slot = start + i; - if (!sstates[i]) { - samplers->desc.dirty_mask &= ~(1llu << slot); + if (!sstates[i]) continue; - } - samplers->desc_data[slot] = sstates[i]->val; - samplers->desc.dirty_mask |= 1llu << slot; + memcpy(samplers->desc.list + slot*4, sstates[i]->val, 4*4); + samplers->desc.list_dirty = true; } - - si_update_descriptors(sctx, &samplers->desc); } /* BUFFER RESOURCES */ -static void si_emit_buffer_resources(struct si_context *sctx, struct r600_atom *atom) -{ - struct si_buffer_resources *buffers = (struct si_buffer_resources*)atom; - - si_emit_descriptors(sctx, &buffers->desc, buffers->desc_data); -} - -static void si_init_buffer_resources(struct si_context *sctx, - struct si_buffer_resources *buffers, +static void si_init_buffer_resources(struct si_buffer_resources *buffers, unsigned num_buffers, unsigned shader_userdata_index, enum radeon_bo_usage shader_usage, enum radeon_bo_priority priority) { - int i; - - buffers->num_buffers = num_buffers; buffers->shader_usage = shader_usage; buffers->priority = priority; buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); - buffers->desc_storage = CALLOC(num_buffers, sizeof(uint32_t) * 4); - /* si_emit_descriptors only accepts an array of arrays. - * This adds such an array. */ - buffers->desc_data = CALLOC(num_buffers, sizeof(uint32_t*)); - for (i = 0; i < num_buffers; i++) { - buffers->desc_data[i] = &buffers->desc_storage[i*4]; - } - - si_init_descriptors(sctx, &buffers->desc, shader_userdata_index, 4, - num_buffers, si_emit_buffer_resources); + si_init_descriptors(&buffers->desc, shader_userdata_index, 4, + num_buffers); } static void si_release_buffer_resources(struct si_buffer_resources *buffers) { int i; - for (i = 0; i < buffers->num_buffers; i++) { + for (i = 0; i < buffers->desc.num_elements; i++) {

