[Mesa-dev] [RFC 06/10] glsl: create shader sha1 if disk cache or blob cache is active
Signed-off-by: Tapani Pälli--- src/compiler/glsl/glsl_parser_extras.cpp | 26 ++ src/util/disk_cache.c| 5 +++-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 59821732b9..6003ddb277 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -2038,22 +2038,24 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, shader->FallbackSource : shader->Source; if (!force_recompile) { - if (ctx->Cache) { - char buf[41]; + /* Compute sha1 if using disk cache or ANDROID blob cache. */ + if (ctx->Cache || ctx->blobCacheSet) { disk_cache_compute_key(ctx->Cache, source, strlen(source), shader->sha1); - if (disk_cache_has_key(ctx->Cache, shader->sha1)) { -/* We've seen this shader before and know it compiles */ -if (ctx->_Shader->Flags & GLSL_CACHE_INFO) { - _mesa_sha1_format(buf, shader->sha1); - fprintf(stderr, "deferring compile of shader: %s\n", buf); -} -shader->CompileStatus = compile_skipped; + } -free((void *)shader->FallbackSource); -shader->FallbackSource = NULL; -return; + if (ctx->Cache && disk_cache_has_key(ctx->Cache, shader->sha1)) { + /* We've seen this shader before and know it compiles */ + if (ctx->_Shader->Flags & GLSL_CACHE_INFO) { +char buf[41]; +_mesa_sha1_format(buf, shader->sha1); +fprintf(stderr, "deferring compile of shader: %s\n", buf); } + shader->CompileStatus = compile_skipped; + + free((void *)shader->FallbackSource); + shader->FallbackSource = NULL; + return; } } else { /* We should only ever end up here if a re-compile has been forced by a diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c index 7ebfa8c045..d8e92d66fe 100644 --- a/src/util/disk_cache.c +++ b/src/util/disk_cache.c @@ -1203,8 +1203,9 @@ disk_cache_compute_key(struct disk_cache *cache, const void *data, size_t size, struct mesa_sha1 ctx; _mesa_sha1_init(); - _mesa_sha1_update(, cache->driver_keys_blob, - cache->driver_keys_blob_size); + if (cache) + _mesa_sha1_update(, cache->driver_keys_blob, +cache->driver_keys_blob_size); _mesa_sha1_update(, data, size); _mesa_sha1_final(, key); } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 10/10] egl: XXX debug framework for EGL_ANDROID_blob_cache
Patch disables EGL_ANDROID_blob_cache and sets own get/set cache functions internally, this makes any EGL app to utilize cache automatically. Signed-off-by: Tapani Pälli--- src/egl/drivers/dri2/egl_dri2.c | 3 -- src/egl/main/eglapi.c | 77 + 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index f9d0223fe2..e24dab1589 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -728,9 +728,6 @@ dri2_setup_screen(_EGLDisplay *disp) } } - if (dri2_dpy->blob) - disp->Extensions.ANDROID_blob_cache = EGL_TRUE; - disp->Extensions.KHR_reusable_sync = EGL_TRUE; if (dri2_dpy->image) { diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index b8d64a913c..5cd70c1afe 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -790,6 +790,80 @@ eglDestroyContext(EGLDisplay dpy, EGLContext ctx) RETURN_EGL_EVAL(disp, ret); } +static void* +_load_file(const char *path, uint64_t *size) +{ + FILE *in = fopen(path, "r"); + + if (!in) +return NULL; + + fseek(in, 0, SEEK_END); + long file_size = ftell(in); + rewind(in); + + printf("%s: %s, size is %ld\n", __func__, path, file_size); + + char *data = (char *) malloc (file_size); + + fread(data, file_size, 1, in); + fclose(in); + + *size = file_size; + return data; +} + +static void +_save_file(const char *path, const void *data, uint64_t size) +{ + printf("%s: %s, size is %ld\n", __func__, path, size); + + FILE *out = fopen(path, "w"); + + if (!out) { +fprintf(stderr, "failed to open file for writing:\npath (%s)\n", path); +return; + } + + fwrite(data, size, 1, out); + fclose(out); +} + +static void +mesa_set(const void* key, EGLsizeiANDROID keySize, const void* value, EGLsizeiANDROID valueSize) +{ + if (valueSize < 2000) +return; + + char tmp[256]; + snprintf(tmp, 256, "/tmp/%s", key); + + _save_file(tmp, value, valueSize); +} + +static EGLsizeiANDROID +mesa_get(const void* key, EGLsizeiANDROID keySize, void* value, EGLsizeiANDROID valueSize) +{ + uint64_t size; + + char tmp[256]; + snprintf(tmp, 256, "/tmp/%s", key); + + void *data = _load_file(tmp, ); + + if (!data) +return 0; + + if (size > valueSize) { +free(data); +return 0; + } + + memcpy(value, data, size); + free(data); + return size; +} + EGLBoolean EGLAPIENTRY eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read, @@ -845,6 +919,9 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read, ret = drv->API.MakeCurrent(drv, disp, draw_surf, read_surf, context); + /* Debug EGL_ANDROID_blob_cache */ + drv->API.SetBlobCacheFuncsANDROID(drv, disp, mesa_set, mesa_get); + RETURN_EGL_EVAL(disp, ret); } -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 08/10] egl: add support for EGL_ANDROID_blob_cache
Signed-off-by: Tapani Pälli--- src/egl/drivers/dri2/egl_dri2.c | 43 + src/egl/drivers/dri2/egl_dri2.h | 4 src/egl/main/eglapi.c | 29 +++ src/egl/main/eglapi.h | 4 src/egl/main/egldisplay.h | 3 +++ src/egl/main/eglentrypoint.h| 1 + 6 files changed, 84 insertions(+) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index d5a4f72e86..f9d0223fe2 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -458,6 +458,7 @@ static const struct dri2_extension_match optional_core_extensions[] = { { __DRI2_INTEROP, 1, offsetof(struct dri2_egl_display, interop) }, { __DRI_IMAGE, 1, offsetof(struct dri2_egl_display, image) }, { __DRI2_FLUSH_CONTROL, 1, offsetof(struct dri2_egl_display, flush_control) }, + { __DRI2_BLOB, 1, offsetof(struct dri2_egl_display, blob) }, { NULL, 0, 0 } }; @@ -727,6 +728,9 @@ dri2_setup_screen(_EGLDisplay *disp) } } + if (dri2_dpy->blob) + disp->Extensions.ANDROID_blob_cache = EGL_TRUE; + disp->Extensions.KHR_reusable_sync = EGL_TRUE; if (dri2_dpy->image) { @@ -1470,6 +1474,26 @@ dri2_surf_update_fence_fd(_EGLContext *ctx, dri2_surface_set_out_fence_fd(surf, fence_fd); } +static void +update_blob_cache_functions(struct dri2_egl_display *dri2_dpy, +struct dri2_egl_context *dri2_ctx) +{ + if (!dri2_dpy || !dri2_ctx) + return; + + /* No blob support. */ + if (!dri2_dpy->blob) + return; + + /* No functions to set. */ + if (!dri2_dpy->blob_cache_set) + return; + + dri2_dpy->blob->set_cache_funcs(dri2_ctx->dri_context, + dri2_dpy->blob_cache_set, + dri2_dpy->blob_cache_get); +} + /** * Called via eglMakeCurrent(), drv->API.MakeCurrent(). */ @@ -1499,6 +1523,9 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, if (old_ctx) dri2_gl_flush(); + /* Make sure cache functions are set for new context. */ + update_blob_cache_functions(dri2_dpy, dri2_ctx); + ddraw = (dsurf) ? dri2_dpy->vtbl->get_dri_drawable(dsurf) : NULL; rdraw = (rsurf) ? dri2_dpy->vtbl->get_dri_drawable(rsurf) : NULL; cctx = (dri2_ctx) ? dri2_ctx->dri_context : NULL; @@ -3016,6 +3043,21 @@ dri2_dup_native_fence_fd(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync) return dup(sync->SyncFd); } +static void +dri2_set_blob_cache_funcs(_EGLDriver *drv, _EGLDisplay *dpy, + EGLSetBlobFuncANDROID set, + EGLGetBlobFuncANDROID get) +{ + _EGLContext *ctx = _eglGetCurrentContext(); + struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); + struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); + + dri2_dpy->blob_cache_set = set; + dri2_dpy->blob_cache_get = get; + + update_blob_cache_functions(dri2_dpy, dri2_ctx); +} + static EGLint dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTime timeout) @@ -3234,6 +3276,7 @@ _eglBuiltInDriver(void) dri2_drv->API.GLInteropQueryDeviceInfo = dri2_interop_query_device_info; dri2_drv->API.GLInteropExportObject = dri2_interop_export_object; dri2_drv->API.DupNativeFenceFDANDROID = dri2_dup_native_fence_fd; + dri2_drv->API.SetBlobCacheFuncsANDROID = dri2_set_blob_cache_funcs; dri2_drv->Name = "DRI2"; diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index cc76c73eab..a6777ad3f1 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -171,6 +171,7 @@ struct dri2_egl_display const __DRInoErrorExtension*no_error; const __DRI2configQueryExtension *config; const __DRI2fenceExtension *fence; + const __DRI2blobExtension *blob; const __DRI2rendererQueryExtension *rendererQuery; const __DRI2interopExtension *interop; int fd; @@ -230,6 +231,9 @@ struct dri2_egl_display bool is_render_node; bool is_different_gpu; + + EGLSetBlobFuncANDROID blob_cache_set; + EGLGetBlobFuncANDROID blob_cache_get; }; struct dri2_egl_context diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 5110688f2d..b8d64a913c 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -476,6 +476,7 @@ _eglCreateExtensionsString(_EGLDisplay *dpy) char *exts = dpy->ExtensionsString; /* Please keep these sorted alphabetically. */ + _EGL_CHECK_EXTENSION(ANDROID_blob_cache); _EGL_CHECK_EXTENSION(ANDROID_framebuffer_target); _EGL_CHECK_EXTENSION(ANDROID_image_native_buffer); _EGL_CHECK_EXTENSION(ANDROID_native_fence_sync); @@ -2522,6 +2523,34 @@ eglQueryDmaBufModifiersEXT(EGLDisplay dpy, EGLint format, EGLint max_modifiers, RETURN_EGL_EVAL(disp,
[Mesa-dev] [RFC 07/10] mesa: add program blob cache functionality
Cache set and get are called in similar fashion as what is happening with disk cache. Functionality requires ARB_get_program_binary and EGL_ANDROID_blob_cache support. Signed-off-by: Tapani Pälli--- src/mesa/Makefile.sources | 2 + src/mesa/main/program_blob_cache.c | 141 + src/mesa/main/program_blob_cache.h | 48 + src/mesa/meson.build | 2 + src/mesa/program/ir_to_mesa.cpp| 9 ++- 5 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 src/mesa/main/program_blob_cache.c create mode 100644 src/mesa/main/program_blob_cache.h diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 53fa486364..bbcfdb425e 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -177,6 +177,8 @@ MAIN_FILES = \ main/polygon.h \ main/program_binary.c \ main/program_binary.h \ + main/program_blob_cache.c \ + main/program_blob_cache.h \ main/program_resource.c \ main/program_resource.h \ main/querymatrix.c \ diff --git a/src/mesa/main/program_blob_cache.c b/src/mesa/main/program_blob_cache.c new file mode 100644 index 00..0b3ea1a549 --- /dev/null +++ b/src/mesa/main/program_blob_cache.c @@ -0,0 +1,141 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2018 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "main/errors.h" +#include "main/mtypes.h" +#include "main/shaderobj.h" +#include "main/program_binary.h" +#include "util/mesa-sha1.h" +#include "compiler/glsl/program.h" + +#include "program_blob_cache.h" + +/* This is what Android EGL defines as the maxValueSize in egl_cache_t + * class implementation. + */ +#define MAX_BLOB_SIZE 64 * 1024 + +static void +generate_sha1_string(struct gl_context *ctx, struct gl_shader_program *shProg, + char *key) +{ + char *buf = create_shader_program_keystr(ctx, shProg); + struct mesa_sha1 sha_ctx; + unsigned char sha1str[20]; + + /* Add driver sha1 to the key string. */ + uint8_t driver_sha1[20]; + char driver_sha1buf[41]; + + ctx->Driver.GetProgramBinaryDriverSHA1(ctx, driver_sha1); + _mesa_sha1_format(driver_sha1buf, driver_sha1); + ralloc_asprintf_append(, "%s", driver_sha1buf); + + _mesa_sha1_init(_ctx); + _mesa_sha1_update(_ctx, buf, strlen(buf)); + _mesa_sha1_final(_ctx, sha1str); + _mesa_sha1_format(key, sha1str); + + ralloc_free(buf); +} + +void +_mesa_blob_cache_set(struct gl_context *ctx, + struct gl_shader_program *shProg) +{ + assert(shProg->data->LinkStatus == linking_success); + + /* ARB_get_program_binary support required. */ + if (!ctx->blobCacheSet || !ctx->Driver.GetProgramBinaryDriverSHA1) + return; + + /* Skip cache for fixed-function programs and programs that use +* transform feedback. +*/ + if (!shProg->Name || shProg->TransformFeedback.NumVarying > 0) + return; + + GLint length; + _mesa_get_program_binary_length(ctx, shProg, ); + + /* Skip cache if exceeds max blob size. */ + if (length > MAX_BLOB_SIZE) + return; + + char *blob = (char *) malloc (length); + + if (!blob) + return; + + GLsizei real_len; + GLenum format; + _mesa_get_program_binary(ctx, shProg, length, _len, +, blob); + + assert(format == GL_PROGRAM_BINARY_FORMAT_MESA); + + char key[41]; + generate_sha1_string(ctx, shProg, key); + + ctx->blobCacheSet(key, 41, blob, real_len); + free(blob); +} + +void +_mesa_blob_cache_get(struct gl_context *ctx, + struct gl_shader_program *shProg) +{ + /* ARB_get_program_binary support required. */ + if (!ctx->blobCacheGet || !ctx->Driver.GetProgramBinaryDriverSHA1) + return; + + void *blob = malloc(MAX_BLOB_SIZE); + + if (!blob) + return; + + char key[41];
[Mesa-dev] [RFC 09/10] i965: add __DRI2_BLOB support and set cache functions
Signed-off-by: Tapani Pälli--- src/mesa/drivers/dri/i965/intel_screen.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 3e016b5b7a..01752700cf 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1483,6 +1483,17 @@ brw_query_renderer_string(__DRIscreen *dri_screen, return -1; } +static void +brw_set_cache_funcs(__DRIcontext *dri_ctx, +__DRIblobCacheSet set, __DRIblobCacheGet get) +{ + struct brw_context *brw = dri_ctx->driverPrivate; + struct gl_context *ctx = >ctx; + + ctx->blobCacheSet = set; + ctx->blobCacheGet = get; +} + static const __DRI2rendererQueryExtension intelRendererQueryExtension = { .base = { __DRI2_RENDERER_QUERY, 1 }, @@ -1494,6 +1505,11 @@ static const __DRIrobustnessExtension dri2Robustness = { .base = { __DRI2_ROBUSTNESS, 1 } }; +static const __DRI2blobExtension intelBlobExtension = { + .base = { __DRI2_BLOB, 1 }, + .set_cache_funcs = brw_set_cache_funcs +}; + static const __DRIextension *screenExtensions[] = { , , @@ -1503,6 +1519,7 @@ static const __DRIextension *screenExtensions[] = { , , , +, NULL }; @@ -1516,6 +1533,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = { , , , +, NULL }; -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 02/10] android: disable disk cache on Android
Original reason why this was enabled was to get sha1 functionality which used to be guarded by shader cache define. Instead of disk cache, Android will utilize EGL_ANDROID_blob_cache for caching. Signed-off-by: Tapani Pälli--- Android.common.mk | 1 - 1 file changed, 1 deletion(-) diff --git a/Android.common.mk b/Android.common.mk index d9f871c08c..6fd90f8895 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -46,7 +46,6 @@ LOCAL_CFLAGS += \ # here to fix the radeonsi build. LOCAL_CFLAGS += \ -DANDROID_API_LEVEL=$(PLATFORM_SDK_VERSION) \ - -DENABLE_SHADER_CACHE \ -D__STDC_CONSTANT_MACROS \ -D__STDC_LIMIT_MACROS \ -DHAVE___BUILTIN_EXPECT \ -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 00/10] EGL_ANDROID_blob_cache RFC
Hi; This series enables EGL_ANDROID_blob_cache extension. Overall approach is to skip linking of shader programs by utilizing functionality of ARB_get_program_binary extension. 2 first patches are separate and could be pushed in, rest is RFC and last patch is additional debug patch that makes Mesa use this cache for all EGL apps. With the last patch I've verified functionality on desktop using following apps: glmark2-es, Synmark, glbenchmark2.7.0 (could not make new gfxbench versions work with EGL ..) and a 3rd party OpenGL ES 2.0 game engine that has relatively complex shaders. *BUT* there are big issues when using this on Android. Main issue is that textures are missing from some of the hwui (library used to render android ui elements) programs. I'm continuing to debug this but wanted to share to approach and see if someone might spot issues with the implementation. Any comments appreciated, thanks! Tapani Pälli (10): glsl: cleanup shader_cache header guard android: disable disk cache on Android dri: add interface for EGL_ANDROID_blob_cache extension glsl: add function to create unique string from shader program mesa: add set and get functions to gl_context glsl: create shader sha1 if disk cache or blob cache is active mesa: add program blob cache functionality egl: add support for EGL_ANDROID_blob_cache i965: add __DRI2_BLOB support and set cache functions egl: XXX debug framework for EGL_ANDROID_blob_cache Android.common.mk| 1 - include/GL/internal/dri_interface.h | 26 +- src/compiler/Makefile.sources| 1 + src/compiler/glsl/glsl_parser_extras.cpp | 26 +++--- src/compiler/glsl/meson.build| 1 + src/compiler/glsl/program.cpp| 88 +++ src/compiler/glsl/program.h | 12 ++- src/compiler/glsl/shader_cache.cpp | 55 +--- src/compiler/glsl/shader_cache.h | 6 +- src/egl/drivers/dri2/egl_dri2.c | 40 + src/egl/drivers/dri2/egl_dri2.h | 4 + src/egl/main/eglapi.c| 106 +++ src/egl/main/eglapi.h| 4 + src/egl/main/egldisplay.h| 3 + src/egl/main/eglentrypoint.h | 1 + src/mesa/Makefile.sources| 2 + src/mesa/drivers/dri/i965/intel_screen.c | 18 src/mesa/main/mtypes.h | 16 src/mesa/main/program_blob_cache.c | 141 +++ src/mesa/main/program_blob_cache.h | 48 +++ src/mesa/meson.build | 2 + src/mesa/program/ir_to_mesa.cpp | 9 +- src/util/disk_cache.c| 5 +- 23 files changed, 538 insertions(+), 77 deletions(-) create mode 100644 src/compiler/glsl/program.cpp create mode 100644 src/mesa/main/program_blob_cache.c create mode 100644 src/mesa/main/program_blob_cache.h -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 04/10] glsl: add function to create unique string from shader program
Patch moves existing functionality from shader_cache_read_program_metadata to a helper function. Signed-off-by: Tapani Pälli--- src/compiler/Makefile.sources | 1 + src/compiler/glsl/meson.build | 1 + src/compiler/glsl/program.cpp | 88 ++ src/compiler/glsl/program.h| 12 -- src/compiler/glsl/shader_cache.cpp | 55 +--- 5 files changed, 100 insertions(+), 57 deletions(-) create mode 100644 src/compiler/glsl/program.cpp diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index d3f746f5f9..e3dfacf98c 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -136,6 +136,7 @@ LIBGLSL_FILES = \ glsl/opt_swizzle.cpp \ glsl/opt_tree_grafting.cpp \ glsl/opt_vectorize.cpp \ + glsl/program.cpp \ glsl/program.h \ glsl/propagate_invariance.cpp \ glsl/s_expression.cpp \ diff --git a/src/compiler/glsl/meson.build b/src/compiler/glsl/meson.build index 6aaa9bab05..5d8d8d4530 100644 --- a/src/compiler/glsl/meson.build +++ b/src/compiler/glsl/meson.build @@ -175,6 +175,7 @@ files_libglsl = files( 'opt_swizzle.cpp', 'opt_tree_grafting.cpp', 'opt_vectorize.cpp', + 'program.cpp', 'program.h', 'propagate_invariance.cpp', 's_expression.cpp', diff --git a/src/compiler/glsl/program.cpp b/src/compiler/glsl/program.cpp new file mode 100644 index 00..df7124485d --- /dev/null +++ b/src/compiler/glsl/program.cpp @@ -0,0 +1,88 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/mtypes.h" +#include "util/mesa-sha1.h" +#include "string_to_uint_map.h" +#include "program.h" + +static void +create_binding_str(const char *key, unsigned value, void *closure) +{ + char **bindings_str = (char **) closure; + ralloc_asprintf_append(bindings_str, "%s:%u,", key, value); +} + +extern "C" char * +create_shader_program_keystr(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + /* Include bindings when creating sha1. These bindings change the resulting +* binary so they are just as important as the shader source. +*/ + char *buf = ralloc_strdup(NULL, "vb: "); + prog->AttributeBindings->iterate(create_binding_str, ); + ralloc_strcat(, "fb: "); + prog->FragDataBindings->iterate(create_binding_str, ); + ralloc_strcat(, "fbi: "); + prog->FragDataIndexBindings->iterate(create_binding_str, ); + + /* SSO has an effect on the linked program so include this when generating +* the sha also. +*/ + ralloc_asprintf_append(, "sso: %s\n", + prog->SeparateShader ? "T" : "F"); + + /* A shader might end up producing different output depending on the glsl +* version supported by the compiler. For example a different path might be +* taken by the preprocessor, so add the version to the hash input. +*/ + ralloc_asprintf_append(, "api: %d glsl: %d fglsl: %d\n", + ctx->API, ctx->Const.GLSLVersion, + ctx->Const.ForceGLSLVersion); + + /* We run the preprocessor on shaders after hashing them, so we need to +* add any extension override vars to the hash. If we don't do this the +* preprocessor could result in different output and we could load the +* wrong shader. +*/ + char *ext_override = getenv("MESA_EXTENSION_OVERRIDE"); + if (ext_override) { + ralloc_asprintf_append(, "ext:%s", ext_override); + } + /* DRI config options may also change the output from the compiler so +* include them as an input to sha1 creation. +*/ + char sha1buf[41]; + _mesa_sha1_format(sha1buf, ctx->Const.dri_config_options_sha1); + ralloc_strcat(, sha1buf); + + for (unsigned i = 0; i < prog->NumShaders; i++) { + struct
[Mesa-dev] [RFC 03/10] dri: add interface for EGL_ANDROID_blob_cache extension
Signed-off-by: Tapani Pälli--- include/GL/internal/dri_interface.h | 26 +- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index 34a5c9fb01..de367d8f77 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -82,7 +82,7 @@ typedef struct __DRI2flushExtensionRec __DRI2flushExtension; typedef struct __DRI2throttleExtensionRec __DRI2throttleExtension; typedef struct __DRI2fenceExtensionRec __DRI2fenceExtension; typedef struct __DRI2interopExtensionRec __DRI2interopExtension; - +typedef struct __DRI2blobExtensionRec __DRI2blobExtension; typedef struct __DRIimageLoaderExtensionRec __DRIimageLoaderExtension; typedef struct __DRIimageDriverExtensionRec __DRIimageDriverExtension; @@ -336,6 +336,30 @@ struct __DRI2throttleExtensionRec { enum __DRI2throttleReason reason); }; +/** + * Extension for EGL_ANDROID_blob_cache + */ + +#define __DRI2_BLOB "DRI2_Blob" +#define __DRI2_BLOB_VERSION 1 + +typedef void +(*__DRIblobCacheSet) (const void *key, signed long keySize, + const void *value, signed long valueSize); + +typedef signed long +(*__DRIblobCacheGet) (const void *key, signed long keySize, + void *value, signed long valueSize); + +struct __DRI2blobExtensionRec { + __DRIextension base; + + /** +* Set cache functions for setting and getting cache entries. +*/ + void (*set_cache_funcs) (__DRIcontext *ctx, +__DRIblobCacheSet set, __DRIblobCacheGet get); +}; /** * Extension for fences / synchronization objects. -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 05/10] mesa: add set and get functions to gl_context
These will be utilized by EGL_ANDROID_blob_cache extension. Signed-off-by: Tapani Pälli--- src/mesa/main/mtypes.h | 16 1 file changed, 16 insertions(+) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index a4f1e78f41..073b6c8536 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -4702,6 +4702,14 @@ struct gl_memory_object GLboolean Dedicated;/**< import memory from a dedicated allocation */ }; +typedef void +(*mesa_blob_cache_set_cb) (const void *key, signed long keySize, + const void *value, signed long valueSize); + +typedef signed long +(*mesa_blob_cache_get_cb) (const void *key, signed long keySize, + void *value, signed long valueSize); + /** * Mesa rendering context. * @@ -5074,6 +5082,14 @@ struct gl_context struct hash_table_u64 *ResidentTextureHandles; struct hash_table_u64 *ResidentImageHandles; /*@}*/ + + /** +* \name EGL_ANDROID_blob_cache +*/ + /*@{*/ + mesa_blob_cache_set_cb blobCacheSet; + mesa_blob_cache_get_cb blobCacheGet; + /*@}*/ }; /** -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC 01/10] glsl: cleanup shader_cache header guard
Signed-off-by: Tapani Pälli--- src/compiler/glsl/shader_cache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/glsl/shader_cache.h b/src/compiler/glsl/shader_cache.h index c0598f37e8..eeefb3aeb2 100644 --- a/src/compiler/glsl/shader_cache.h +++ b/src/compiler/glsl/shader_cache.h @@ -21,8 +21,8 @@ * DEALINGS IN THE SOFTWARE. */ -#ifndef SHADER_CACHE -#define SHADER_CACHE +#ifndef SHADER_CACHE_H +#define SHADER_CACHE_H #include "util/disk_cache.h" @@ -34,4 +34,4 @@ bool shader_cache_read_program_metadata(struct gl_context *ctx, struct gl_shader_program *prog); -#endif /* GLSL_SYMBOL_TABLE */ +#endif /* SHADER_CACHE_H */ -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC PATCH 0/6] r600: speed up tesselation shaders
On 8 January 2018 at 17:12, Dave Airliewrote: > On 6 January 2018 at 03:41, Gert Wollny wrote: >> Am Freitag, den 05.01.2018, 18:18 +0100 schrieb Gert Wollny: >>> >>> Well, I have tested some piglits now and the behaviour is quite >>> wired: >>> >>> When I run nop as the very first piglit after booting the machine it >>> works. After running other piglits (specifically tcs-input-read- >>> array-interface and tcs-input-read-mat), nop starts to fail, also >>> without sb. >>> >>> Restarting X is not enough to get nop to pass again. >>> >>> If I run piglit normally on the shader subset, I also get lockups and >>> I even got kicked out of X, the last syslog message related to this >>> was: >>> >>> [ 1403.211887] [drm:r600_ib_test [radeon]] *ERROR* radeon: fence wait >>> timed out. >>> [ 1403.211932] [drm:radeon_ib_ring_tests [radeon]] *ERROR* radeon: >>> failed testing IB on GFX ring (-110). >>> >> >> When I run Unigine_Heaven with your WIP code and all sb passes for >> tesselation enabled, I get a crash because of a stack overflow, i.e. >> the hash evaluation ends up in an infinite recursion doing a ping-pong >> between two nodes: >> >> ... >> #747 in r600_sb::node::hash (this=0x1e01228) at sb/sb_ir.cpp:277 >> #748 in r600_sb::value::hash (this=0x1e39cd0) at sb/sb_valtable.cpp:189 >> #749 in r600_sb::value::hash (this=< >) at sb/sb_valtable.cpp:184 >> #750 in r600_sb::node::hash_src (this=this@entry= ) at sb/sb_ir.cpp:265 >> #751 in r600_sb::node::hash (this=0x1e00bf0) at sb/sb_ir.cpp:277 >> #752 in r600_sb::value::hash (this=0x1e39e70) at sb/sb_valtable.cpp:189 >> #753 in r600_sb::value::hash (this=< >) at sb/sb_valtable.cpp:184 >> #754 in r600_sb::node::hash_src (this=this@entry= ) at sb/sb_ir.cpp:265 >> #755 in r600_sb::node::hash (this=0x1e01228) at sb/sb_ir.cpp:277 > > Yeah I see the same. Not 100% sure why yet. > > For nop.shader_test I've noticed if you move the position line above the > tess factor emission things start to work, which is confusing me no end, > it's sounds like we doing something bad with LDS still. I've pushed out another few hacks in progress. I've got heaven running now, and seem to get about the same speedup you were getting with this series. On piglit -t tessellation I've got about 13 crashes in some variable indexing tests. and they are all GCM related, I've torn out a fair bit of hair this afternoon trying to keep the gcm scheduler happy but to no avail. tests/spec/arb_tessellation_shader/execution/variable-indexing/tcs-input-array-float-index-rd.shader_test is one of the culprits, it looks like GCM schedules a bunch of basic blocks, but then some instructions are dont_move but get scheduled wrong. Dave. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] glsl: remove LowerTCSPatchVerticesIn
mmm... you're right, I'l add that too. Thanks! On Mon, 2018-01-08 at 08:23 -0800, Jason Ekstrand wrote: > This looks good to me. However, I think we'll need to do the same > thing for LowerTESPatchVerticesIn and they can probably be handled by > the same pass. > > On Mon, Jan 8, 2018 at 5:13 AM, Iago Toral Quiroga> wrote: > > Intel was the only user, now it does this in NIR. > > > > --- > > > > src/compiler/glsl/builtin_variables.cpp | 13 + > > > > src/mesa/drivers/dri/i965/brw_context.c | 1 - > > > > src/mesa/main/mtypes.h | 1 - > > > > 3 files changed, 1 insertion(+), 14 deletions(-) > > > > > > > > diff --git a/src/compiler/glsl/builtin_variables.cpp > > b/src/compiler/glsl/builtin_variables.cpp > > > > index a885f32875..a686cb6a45 100644 > > > > --- a/src/compiler/glsl/builtin_variables.cpp > > > > +++ b/src/compiler/glsl/builtin_variables.cpp > > > > @@ -38,11 +38,6 @@ static const struct gl_builtin_uniform_element > > gl_NumSamples_elements[] = { > > > > {NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_} > > > > }; > > > > > > > > -/* only for TCS */ > > > > -static const struct gl_builtin_uniform_element > > gl_PatchVerticesIn_elements[] = { > > > > - {NULL, {STATE_INTERNAL, STATE_TCS_PATCH_VERTICES_IN}, > > SWIZZLE_} > > > > -}; > > > > - > > > > static const struct gl_builtin_uniform_element > > gl_DepthRange_elements[] = { > > > > {"near", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_}, > > > > {"far", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_}, > > > > @@ -240,7 +235,6 @@ static const struct gl_builtin_uniform_element > > gl_NormalMatrix_elements[] = { > > > > #define STATEVAR(name) {#name, name ## _elements, ARRAY_SIZE(name > > ## _elements)} > > > > > > > > static const struct gl_builtin_uniform_desc > > _mesa_builtin_uniform_desc[] = { > > > > - STATEVAR(gl_PatchVerticesIn), > > > > STATEVAR(gl_NumSamples), > > > > STATEVAR(gl_DepthRange), > > > > STATEVAR(gl_ClipPlane), > > > > @@ -1067,12 +1061,7 @@ > > builtin_variable_generator::generate_tcs_special_vars() > > > > { > > > > add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, > > "gl_PrimitiveID"); > > > > add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, > > "gl_InvocationID"); > > > > - > > > > - if (state->ctx->Const.LowerTCSPatchVerticesIn) { > > > > - add_uniform(int_t, "gl_PatchVerticesIn"); > > > > - } else { > > > > - add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, > > "gl_PatchVerticesIn"); > > > > - } > > > > + add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, > > "gl_PatchVerticesIn"); > > > > > > > > add_output(VARYING_SLOT_TESS_LEVEL_OUTER, array(float_t, 4), > > > > "gl_TessLevelOuter")->data.patch = 1; > > > > diff --git a/src/mesa/drivers/dri/i965/brw_context.c > > b/src/mesa/drivers/dri/i965/brw_context.c > > > > index b555f7bfdf..e44c2b1f34 100644 > > > > --- a/src/mesa/drivers/dri/i965/brw_context.c > > > > +++ b/src/mesa/drivers/dri/i965/brw_context.c > > > > @@ -537,7 +537,6 @@ brw_initialize_context_constants(struct > > brw_context *brw) > > > > ctx->Const.MaxClipPlanes = 8; > > > > > > > > ctx->Const.GLSLTessLevelsAsInputs = true; > > > > - ctx->Const.LowerTCSPatchVerticesIn = devinfo->gen >= 8; > > > > ctx->Const.LowerTESPatchVerticesIn = true; > > > > ctx->Const.PrimitiveRestartForPatches = true; > > > > > > > > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h > > > > index a4f1e78f41..fce456b2b4 100644 > > > > --- a/src/mesa/main/mtypes.h > > > > +++ b/src/mesa/main/mtypes.h > > > > @@ -4020,7 +4020,6 @@ struct gl_constants > > > > GLuint MaxTessPatchComponents; > > > > GLuint MaxTessControlTotalOutputComponents; > > > > bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to > > vecn? */ > > > > - bool LowerTCSPatchVerticesIn; /**< Lower gl_PatchVerticesIn to > > a uniform */ > > > > bool LowerTESPatchVerticesIn; /**< Lower gl_PatchVerticesIn to > > a uniform */ > > > > bool PrimitiveRestartForPatches; > > > > bool LowerCsDerivedVariables; /**< Lower > > gl_GlobalInvocationID and > > > > -- > > > > 2.11.0 > > > > > > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] dri_util: remove ALLOW_RGB10_CONFIGS option
On 01/08/2018 06:59 PM, Marek Olšák wrote: From: Marek OlšákThis is unused because it's for libGL/libEGL, not drivers. IMO this is not enough, you would need to also remove the query for this as well (driQueryOptionb in intel_screen.c), otherwise we hit assertion. Maybe just set bool allow_rgb10_configs = false for now? --- src/mesa/drivers/dri/common/dri_util.c | 4 1 file changed, 4 deletions(-) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index d4fba0b..e6a7d23 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -48,24 +48,20 @@ #include "main/version.h" #include "main/debug_output.h" #include "main/errors.h" #include "main/macros.h" const char __dri2ConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1) DRI_CONF_SECTION_END - - DRI_CONF_SECTION_MISCELLANEOUS - DRI_CONF_ALLOW_RGB10_CONFIGS("true") - DRI_CONF_SECTION_END DRI_CONF_END; /*/ /** \name Screen handling functions */ /*/ /*@{*/ static void setupLoaderExtensions(__DRIscreen *psp, const __DRIextension **extensions) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] tgsi: include struct definitions for tgsi_build declarations
On Mon, Jan 8, 2018 at 7:25 PM, Ilia Mirkinwrote: > On Mon, Jan 8, 2018 at 4:18 PM, Rob Herring wrote: >> Many of the functions declared in tgsi_build.h return structs (not struct >> pointers). Therefore the full struct definitions are needed to avoid >> warnings or errors: >> >> In file included from >> src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp:23: >> external/mesa3d/src/gallium/auxiliary/tgsi/tgsi_build.h:47:1: error: >> 'tgsi_build_header' has C-linkage specified, but returns incomplete type >> 'struct tgsi_header' which could be incompatible with C >> [-Werror,-Wreturn-type-c-linkage] > > Ah, looks like this is real, and a result of my recent bindless image > changes. When your bot emailed me I assumed it was fake, as it had > sent previous false positives. Sorry about that. Yeah, it's not as stable as I'd like... > Arguably the tgsi_parse.h should be included in from_tgsi.cpp - not > sure, since I also make use of struct tgsi_full_src_register. But > files including what they need is good too. I'm open to either > solution. Following the latter, it probably should be included in both then. >> This error shows up on Android builds using clang and -Werror. >> >> Cc: Ilia Mirkin >> Signed-off-by: Rob Herring >> --- >> src/gallium/auxiliary/tgsi/tgsi_build.h | 6 +- >> 1 file changed, 1 insertion(+), 5 deletions(-) >> >> diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h >> b/src/gallium/auxiliary/tgsi/tgsi_build.h >> index 53f31932c021..053292957211 100644 >> --- a/src/gallium/auxiliary/tgsi/tgsi_build.h >> +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h >> @@ -28,11 +28,7 @@ >> #ifndef TGSI_BUILD_H >> #define TGSI_BUILD_H >> >> - >> -struct tgsi_token; >> -struct tgsi_full_dst_register; >> -struct tgsi_full_src_register; >> - >> +#include > > We use "tgsi/tgsi_parse.h" everywhere. Okay. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] r600: fix enabled_rb_mask on eg/cm
From: Roland ScheideggerFor eg/cm, the r600_gb_backend_map will always be 0. I assume this is a bug in the drm kernel driver, as it just just never fills the information in. I am not entirely sure if the map is supposed to be needed for these chips, since unlike on r600/r700 the value calculated for the map is in fact written to the GB_BACKEND_MAP reg, for which I am unable to dig up any documentation. In any case, this causes r600_query_hw_prepare_buffer to write the "status bit" (just the highest bit of the occlusion query result) even for active rbes (all but the first). This doesn't make much sense, albeit I suppose it's mostly safe. According to the commit history, it's necessary to set these bits for inactive rbes since otherwise predication will lock up - presumably the hw just is waiting for the status bit to appear, which will never happen with inactive rbes. I'd guess potentially predication could be wrong (due to not waiting for the actual result if the status bit is already there) if this is set for active rbes. Discovered while trying to fix predication lockups on Juniper (needs another patch). --- src/gallium/drivers/r600/r600_query.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 987da9a806..699404b10d 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -1834,8 +1834,14 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen) assert(rscreen->chip_class <= CAYMAN); - /* if backend_map query is supported by the kernel */ - if (rscreen->info.r600_gb_backend_map_valid) { + /* +* if backend_map query is supported by the kernel. +* Note the kernel drm driver (as of now) never fills in the associated +* data on eg/cm, only r600/r700, hence ignore the valid bit there. +* (Albeit some chips with just one active rb can have a valid 0 map.) +*/ + if (rscreen->info.r600_gb_backend_map_valid && + (ctx->chip_class < EVERGREEN || rscreen->info.r600_gb_backend_map != 0)) { unsigned num_tile_pipes = rscreen->info.num_tile_pipes; unsigned backend_map = rscreen->info.r600_gb_backend_map; unsigned item_width, item_mask; -- 2.12.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] r600: hack up num_render_backends on Juniper to 8
From: Roland ScheideggerJuniper really has a maximum of 4 RBEs (16 pixels). However, predication always locks up on my HD 5750, and through experiments it looks like if we're pretending it has a maximum of 8, with 4 disabled, it works correctly. My conclusion would be that there's a bug (likely firmware, not hw) which causes the predication logic to try to read 8 results out of the query buffer instead of just 4, and since of course noone ever writes the upper 4, the status bit is never set and hence it will wait for it forever. Ideally this would be fixed in firmware, but I'd guess chances of that happening are slim. This will double the size of (occlusion) query result buffers, write the status bit for the disabled rbs in these buffers, and will also add 8 results together instead of just 4 when reading them back. The latter is unnecessary, but it's probably not worth bothering - luckily num_render_backends isn't used outside of occlusion queries, so don't need separate value for the "real" maximum. Also print out the enabled_rb_mask if it changed from the pre-fixed value (which is already printed out), just in case there's some more problems with chips which have some rbs disabled... This fixes all the lockups with piglit nv_conditional_render tests on my HD 5750 (all pass). --- src/gallium/drivers/r600/r600_query.c | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 699404b10d..6fc00819b1 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -1830,7 +1830,19 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen) struct r600_resource *buffer; uint32_t *results; unsigned i, mask = 0; - unsigned max_rbs = ctx->screen->info.num_render_backends; + unsigned max_rbs; + + if (ctx->family == CHIP_JUNIPER) { + /* +* Fix for predication lockups - the chip can only ever have +* 4 RBs, however it looks like the predication logic assumes +* there's 8, trying to read results from query buffers never +* written to. By increasing this number we'll write the +* status bit for these as per the normal disabled rb logic. +*/ + ctx->screen->info.num_render_backends = 8; + } + max_rbs = ctx->screen->info.num_render_backends; assert(rscreen->chip_class <= CAYMAN); @@ -1901,8 +1913,13 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen) r600_resource_reference(, NULL); - if (mask) + if (mask) { + if (rscreen->debug_flags & DBG_INFO && + mask != rscreen->info.enabled_rb_mask) { + printf("enabled_rb_mask (fixed) = 0x%x\n", mask); + } rscreen->info.enabled_rb_mask = mask; + } } #define XFULL(name_, query_type_, type_, result_type_, group_id_) \ -- 2.12.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] winsys/radeon: fix up default enabled_rb_mask for r600
From: Roland ScheideggerThe logic had two fatal flaws which completely killed the default value. 1) drm will overwrite the value anyway even if the chip can't be handled 2) the default value logic is relying on num_render_backends, which was filled in later. Luckily noone is relying on it, but it's a bit confusing seeing the chip clock printed out there (as hex) with R600_DEBUG=info... (Albeit radeonsi does not appear to fix up the value. If kernels which don't handle this query are still supported, radeonsi will still end up with a broken enabled_rb_mask, I have no idea of the potential results of this there.) --- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index e600199d26..10f2ecc900 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -369,12 +369,6 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) >info.max_shader_clock); ws->info.max_shader_clock /= 1000; -/* Default value. */ -ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends); -/* This fails on non-GCN or older kernels: */ -radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, - >info.enabled_rb_mask); - ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); /* Generation-specific queries. */ @@ -433,6 +427,16 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) >info.r600_gb_backend_map)) ws->info.r600_gb_backend_map_valid = true; +/* Default value. */ +ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends); +/* + * This fails (silently) on non-GCN or older kernels, overwriting the + * default enabled_rb_mask with the result of the last query. +*/ +if (ws->gen >= DRV_SI) +radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, + >info.enabled_rb_mask); + ws->info.has_virtual_memory = false; if (ws->info.drm_minor >= 13) { uint32_t ib_vm_max_size; -- 2.12.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 104214] Dota crashes when switching from game to desktop
https://bugs.freedesktop.org/show_bug.cgi?id=104214 --- Comment #24 from Sven--- I'm sorry but multiple issue seem to be discussed here: - dota crashing on startup (seems to be fixed) - dota crashing on alt+tab (not yet fixed) - mpv crashing (not yet fixed) The alt+tab crash I'm seeing seems to stem from a bug or problem in intel_miptree_create_for_dri_image. I posted the stack trace above. (In reply to Sven from comment #19) > It sound like bug 104392 . I must revert that comment. The backtrace posted there is significantly different. (In reply to Evangelos Foutras from comment #18) > Could this be related to bug 104342 (and the most likely duplicate bugs I've > added to its "See Also" bugs)? The backtrace here looks a bit different but > the brw_clear() call is common (and a few other calls further up). I'm not sure whether this is a duplicate of bug 104342. But yes, my backtrace also contains brw_clear(). Is there some theory on what's going on or a patch I could test? -- You are receiving this mail because: You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] tgsi: include struct definitions for tgsi_build declarations
On Mon, Jan 8, 2018 at 4:18 PM, Rob Herringwrote: > Many of the functions declared in tgsi_build.h return structs (not struct > pointers). Therefore the full struct definitions are needed to avoid > warnings or errors: > > In file included from > src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp:23: > external/mesa3d/src/gallium/auxiliary/tgsi/tgsi_build.h:47:1: error: > 'tgsi_build_header' has C-linkage specified, but returns incomplete type > 'struct tgsi_header' which could be incompatible with C > [-Werror,-Wreturn-type-c-linkage] Ah, looks like this is real, and a result of my recent bindless image changes. When your bot emailed me I assumed it was fake, as it had sent previous false positives. Sorry about that. Arguably the tgsi_parse.h should be included in from_tgsi.cpp - not sure, since I also make use of struct tgsi_full_src_register. But files including what they need is good too. I'm open to either solution. > > This error shows up on Android builds using clang and -Werror. > > Cc: Ilia Mirkin > Signed-off-by: Rob Herring > --- > src/gallium/auxiliary/tgsi/tgsi_build.h | 6 +- > 1 file changed, 1 insertion(+), 5 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h > b/src/gallium/auxiliary/tgsi/tgsi_build.h > index 53f31932c021..053292957211 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_build.h > +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h > @@ -28,11 +28,7 @@ > #ifndef TGSI_BUILD_H > #define TGSI_BUILD_H > > - > -struct tgsi_token; > -struct tgsi_full_dst_register; > -struct tgsi_full_src_register; > - > +#include We use "tgsi/tgsi_parse.h" everywhere. > > #if defined __cplusplus > extern "C" { > -- > 2.14.1 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] Revert "Revert "i965/fs: Use align1 mode on ternary instructions on Gen10+""
Matt Turnerwrites: > This reverts commit 2d0457203871c843ebfc90fb895b65a9b14cd9bb. Reviewed-by: Scott D Phillips ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965/fs: Add/use functions to convert to 3src_align1 vstride/hstride
Matt Turnerwrites: > Some cases weren't handled, such as stride 4 which is needed for 64-bit > operations. Presumably fixes the assertion failure mentioned in commit > 2d0457203871 (Revert "i965/fs: Use align1 mode on ternary instructions > on Gen10+") but who can really say since the commit neglected to list > any of them! > --- > src/intel/compiler/brw_eu_emit.c | 69 > > 1 file changed, 41 insertions(+), 28 deletions(-) > > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > index 85bb6a4cdd..c25d8d6eda 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -673,6 +673,42 @@ get_3src_subreg_nr(struct brw_reg reg) > return reg.subnr / 4; > } > > +static enum gen10_align1_3src_vertical_stride > +to_3src_align1_vstride(enum brw_vertical_stride vstride) > +{ > + switch (vstride) { > + case BRW_VERTICAL_STRIDE_0: > + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0; > + case BRW_VERTICAL_STRIDE_2: > + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2; > + case BRW_VERTICAL_STRIDE_4: > + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4; > + case BRW_VERTICAL_STRIDE_8: > + case BRW_VERTICAL_STRIDE_16: > + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8; What is the reasoning for vstride 16 to map to 8 here? Could that cause problems? > + default: > + unreachable("invalid vstride"); > + } > +} > + > + > +static enum gen10_align1_3src_src_horizontal_stride > +to_3src_align1_hstride(enum brw_horizontal_stride hstride) > +{ > + switch (hstride) { > + case BRW_HORIZONTAL_STRIDE_0: > + return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0; > + case BRW_HORIZONTAL_STRIDE_1: > + return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1; > + case BRW_HORIZONTAL_STRIDE_2: > + return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2; > + case BRW_HORIZONTAL_STRIDE_4: > + return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4; > + default: > + unreachable("invalid hstride"); > + } > +} > + > static brw_inst * > brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, > struct brw_reg src0, struct brw_reg src1, struct brw_reg src2) > @@ -721,41 +757,18 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct > brw_reg dest, >brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type); >brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type); > > - assert((src0.vstride == BRW_VERTICAL_STRIDE_0 && > - src0.hstride == BRW_HORIZONTAL_STRIDE_0) || > - (src0.vstride == BRW_VERTICAL_STRIDE_8 && > - src0.hstride == BRW_HORIZONTAL_STRIDE_1)); > - assert((src1.vstride == BRW_VERTICAL_STRIDE_0 && > - src1.hstride == BRW_HORIZONTAL_STRIDE_0) || > - (src1.vstride == BRW_VERTICAL_STRIDE_8 && > - src1.hstride == BRW_HORIZONTAL_STRIDE_1)); > - assert((src2.vstride == BRW_VERTICAL_STRIDE_0 && > - src2.hstride == BRW_HORIZONTAL_STRIDE_0) || > - (src2.vstride == BRW_VERTICAL_STRIDE_8 && > - src2.hstride == BRW_HORIZONTAL_STRIDE_1)); > - Were 0,x,0 and 8,x,1 just a list of expected cases before or was it toward some restriction? I'm not seeing anything in the documentation that implies a restriction, so I'm guessing the former. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nv50/ir/ra: Fix copying compound for moves
On Mon, Jan 8, 2018 at 7:25 PM, Connor Abbottwrote: > In order to reduce moves when coalescing multiple registers into a > larger register, RA will try to coalesce MERGE instructions with their > definitions. For example, for something like this in GLSL: > > uint a = ...; > uint b = ...; > uint64 x = packUint2x32(a, b); > > The compiler will try to coalesce x with a and b, in the same way as > something like: > > uint a = ...; > uint b = ...; > ... > uint x = phi(a, b); > > with the crucial difference that the definitions of a and b only clobber > part of the register, instead of the whole thing. This information is > carried through the compound flag and compMask bitmask. If compound is > set, then the value has been coalesced in such a way that not all the > defs clobber the entire register. The compMask bitmask describes which > subregister each def clobbers, although it does it in a slightly > convoluted way. It's an invariant that once compound is set on one def, > it must be set for all the defs in a given coalesced value. A sorta related thing (ie, same problem, but implemented in a different way in ir3_ra.c:get_definer()), dealing w/ live ranges when different instructions write parts of a merged register, is I think the most annoying/painful thing about ir3_ra.. the simple case is simple, but it gets annoying quickly when you combine splits (fanout) / merges (fanin). I think I do a sufficient job of preventing too many mov's from being eliminated pre-ra to keep it all working, but seems super fragile. I'd be curious if you'd come across any papers (preferably not paywalled) on how to deal with that in a better way? BR, -R > In more detail, the constraints pass will first create extra moves: > > uint a = ...; > uint b = ...; > uint a' = a; > uint b' = b; > uint64 x = packUint2x32(a', b'); > > and then RA will merge values involved in MERGE/SPLIT instructions, > merging x with a' and b' and making the combined value compound -- this > is relatively simple, and will always succeed since we just created a' > and b', so they never interfere with x, and x has no other definitions, > since we haven't started coalescing moves yet. Basically, we just replaced > the MERGE instruction with an equivalent sequence of partial writes to the > destination. The tricky part comes when we try to merge a' with a > and b' with b. We need to transfer the compound information from a' to a > and b' to b, which copyCompound() does, but we also need to transfer it > to any defs coalesced with a and b, which the code failed to do. Similarly, > if x is the argument to a phi instruction, then when we try to merge it > with other arguments to the same phi by coalescing moves, we'd have > problems guaranteeing that all the other merged defs stay up-to-date. > > One tricky part of fixing this is that in order to properly propagate > the information from a' to a, we need to do it before the defs for a and > a' are merged in coalesceValues(), since we need to know which defs are > merged with a but not a' -- after coalesceValues() returns, all the defs > have been combined, so we don't know which is which. I took the approach > of calling copyCompound() inside coalesceValues(), instead of > afterwards. > > Cc: Ilia Mirkin > Cc: Karol Herbst > --- > So, I guess curiosity got the best of me :). Of course, I have no actual > way to test if this fixes the problem, but hopefully this at least helps > someone get further... > > src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 56 > ++ > 1 file changed, 36 insertions(+), 20 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > index b33d7b4010..2664c0678f 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > @@ -890,6 +890,34 @@ GCRA::RIG_Node::init(const RegisterSet& regs, LValue > *lval) > livei.insert(lval->livei); > } > > +// Used when coalescing moves. The non-compound value will become one, e.g.: > +// mov b32 $r0 $r2/ merge b64 $r0d { $r0 $r1 } > +// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d > +static inline void copyCompound(Value *dst, Value *src) > +{ > + LValue *ldst = dst->asLValue(); > + LValue *lsrc = src->asLValue(); > + > + if (ldst->compound && !lsrc->compound) { > + LValue *swap = lsrc; > + lsrc = ldst; > + ldst = swap; > + } > + > + assert(!ldst->compound); > + > + if (lsrc->compound) { > + Value *dstRep = ldst->join; > + for (Value::DefIterator d = dstRep->defs.begin(); d != > dstRep->defs.end(); > + ++d) { > + LValue *ldst = (*d)->get()->asLValue(); > + assert(!ldst->compound) > + ldst->compound = 1; > + ldst->compMask = lsrc->compMask; > + } > + } > +} > + > bool > GCRA::coalesceValues(Value *dst,
Re: [Mesa-dev] [PATCH 4/8] intel/blorp: Add suport for fast clear address.
On Mon, Jan 08, 2018 at 04:06:53PM -0800, Jason Ekstrand wrote: > On Fri, Dec 15, 2017 at 2:53 PM, Rafael Antognolli >> wrote: > > On gen10+, if surface->clear_color_addr is present, use it directly > intead of copying it to the surface state. > > Signed-off-by: Rafael Antognolli > --- > src/intel/blorp/blorp_genX_exec.h | 12 +--- > 1 file changed, 9 insertions(+), 3 deletions(-) > > diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/ > blorp_genX_exec.h > index 1968460be05..a01d21c2cdd 100644 > --- a/src/intel/blorp/blorp_genX_exec.h > +++ b/src/intel/blorp/blorp_genX_exec.h > @@ -1276,11 +1276,15 @@ blorp_emit_surface_state(struct blorp_batch > *batch, > write_disable_mask |= ISL_CHANNEL_ALPHA_BIT; > } > > + const bool use_clear_address = > + GEN_GEN >= 10 && (surface->clear_color_addr.buffer != NULL); > + > isl_surf_fill_state(batch->blorp->isl_dev, state, > .surf = , .view = >view, > .aux_surf = >aux_surf, .aux_usage = > aux_usage, > .mocs = surface->addr.mocs, > .clear_color = surface->clear_color, > + .use_clear_address = use_clear_address, > .write_disables = write_disable_mask); > > blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, > @@ -1300,9 +1304,11 @@ blorp_emit_surface_state(struct blorp_batch *batch, > blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * > 4); > > if (surface->clear_color_addr.buffer) { > -#if GEN_GEN > 10 > - unreachable("Implement indirect clear support on gen11+"); > -#elif GEN_GEN >= 7 && GEN_GEN <= 10 > +#if GEN_GEN >= 10 > + assert((surface->clear_color_addr.offset & 0x3f) == 0); > + blorp_surface_reloc(batch, state_offset + isl_dev->ss.clear_value_ > offset, > > > Are the value offset and the address offset the same? I think they are, but > just want to be sure. :-) Yes, they are. I thought about adding a new field like isl_dev->ss.clear_address_offset to avoid confusion, but they would end up being the same, so I just reused it. I can change this if you prefer. > + surface->clear_color_addr, 0); > +#elif GEN_GEN >= 7 && GEN_GEN < 10 >struct blorp_address dst_addr = blorp_get_surface_base_address > (batch); >dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset; >blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr, > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/8] anv: Make the clear state buffer 64 bytes aligned.
On Mon, Jan 08, 2018 at 04:03:47PM -0800, Jason Ekstrand wrote: > On Mon, Jan 8, 2018 at 3:00 PM, Nanley Cherywrote: > > On Fri, Dec 15, 2017 at 02:53:30PM -0800, Rafael Antognolli wrote: > > On Gen10+, if we use the clear state address field in the surface state > > instead of the clear color directly, there's a restriction that the > > address must point to the lower part of a 64 byte cache-line. > > > > Signed-off-by: Rafael Antognolli > > --- > > src/intel/vulkan/anv_private.h | 12 +++- > > 1 file changed, 11 insertions(+), 1 deletion(-) > > > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_ > private.h > > index b7bde4b8ce6..43cbf065724 100644 > > --- a/src/intel/vulkan/anv_private.h > > +++ b/src/intel/vulkan/anv_private.h > > @@ -2490,7 +2490,17 @@ anv_fast_clear_state_entry_size(const struct > anv_device *device) > > * GPU memcpy operations. > > */ > > assert(device->isl_dev.ss.clear_value_size % 4 == 0); > > - return device->isl_dev.ss.clear_value_size + 4; > > + > > + const unsigned entry_size = device->isl_dev.ss.clear_value_size + 4; > > + /* On Gen10+, we use the clear color address of the surface to point > to this > > +* buffer directly. However, according to the bspec: > > +* > > +*The memory layout of the clear color pointed to by this > address > is a > > +*value stored in the lower-order bytes of a 64-byte cache-line. > > +* > > +* So add some padding here for Gen10+. > > +*/ > > I don't see any indication that the upper bytes may be modified by the > hardware. For that reason, I think we can assume that the image that > precedes this entry is at least 64 bytes and avoid padding the entry. > > > I'm not sure what you mean by this. Hmm... maybe my comment is confusing, but the idea is to add padding to the entry, so if we have multiple entries (one per level), all of them are aligned. I can try to find a better way to guarantee that. Without this code, I was hitting the assert in patch 04 on some tests. > > + return device->info.gen >= 10 ? ALIGN(entry_size, 64) : entry_size; > > > > } > > > > static inline struct anv_address > > -- > > 2.14.3 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/8] i965/miptree: Add space to store the clear value in the aux surface.
On Mon, Jan 08, 2018 at 03:14:54PM -0800, Nanley Chery wrote: > On Fri, Dec 15, 2017 at 02:53:32PM -0800, Rafael Antognolli wrote: > > Similarly to vulkan where we store the clear value in the aux surface, > > we can do the same in GL. > > > > Signed-off-by: Rafael Antognolli> > --- > > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 25 > > + > > 1 file changed, 25 insertions(+) > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > index ead0c359c0f..6400a2a616a 100644 > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > @@ -1663,6 +1663,21 @@ intel_miptree_init_mcs(struct brw_context *brw, > > brw_bo_unmap(mt->mcs_buf->bo); > > } > > > > +static unsigned > > +fast_clear_state_entry_size(const struct brw_context *brw) > > +{ > > + assert(brw); > > + > > + /* Entry contents: > > +* ++ > > +* | clear value dword(s) | > > +* ++ > > +*/ > > + assert(brw->isl_dev.ss.clear_value_size % 4 == 0); > > + > > + return brw->isl_dev.ss.clear_value_size; > > +} > > + > > Do you think more may fields may be added to this in the future? I'm > trying to understand why we have an additional function here. No, I just tried to make it similar to anv, but anv at least has a reason to have such function. I'll remove it in the next iteration. > > static struct intel_miptree_aux_buffer * > > intel_alloc_aux_buffer(struct brw_context *brw, > > const char *name, > > @@ -1675,6 +1690,16 @@ intel_alloc_aux_buffer(struct brw_context *brw, > >return false; > > > > buf->size = aux_surf->size; > > + > > + const struct gen_device_info *devinfo = >screen->devinfo; > > + if (devinfo->gen >= 10) { > > + /* On CNL, instead of setting the clear color in the SURFACE_STATE, > > we > > + * will set a pointer to a dword somewhere that contains the color. > > So, > > + * allocate the space for the clear color value here on the aux > > buffer. > > + */ > > + buf->size += fast_clear_state_entry_size(brw); > > I like the plan of allocating more space here. > > > + } > > + > > buf->pitch = aux_surf->row_pitch; > > buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf); > > > > -- > > 2.14.3 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nv50/ir/ra: Fix copying compound for moves
In order to reduce moves when coalescing multiple registers into a larger register, RA will try to coalesce MERGE instructions with their definitions. For example, for something like this in GLSL: uint a = ...; uint b = ...; uint64 x = packUint2x32(a, b); The compiler will try to coalesce x with a and b, in the same way as something like: uint a = ...; uint b = ...; ... uint x = phi(a, b); with the crucial difference that the definitions of a and b only clobber part of the register, instead of the whole thing. This information is carried through the compound flag and compMask bitmask. If compound is set, then the value has been coalesced in such a way that not all the defs clobber the entire register. The compMask bitmask describes which subregister each def clobbers, although it does it in a slightly convoluted way. It's an invariant that once compound is set on one def, it must be set for all the defs in a given coalesced value. In more detail, the constraints pass will first create extra moves: uint a = ...; uint b = ...; uint a' = a; uint b' = b; uint64 x = packUint2x32(a', b'); and then RA will merge values involved in MERGE/SPLIT instructions, merging x with a' and b' and making the combined value compound -- this is relatively simple, and will always succeed since we just created a' and b', so they never interfere with x, and x has no other definitions, since we haven't started coalescing moves yet. Basically, we just replaced the MERGE instruction with an equivalent sequence of partial writes to the destination. The tricky part comes when we try to merge a' with a and b' with b. We need to transfer the compound information from a' to a and b' to b, which copyCompound() does, but we also need to transfer it to any defs coalesced with a and b, which the code failed to do. Similarly, if x is the argument to a phi instruction, then when we try to merge it with other arguments to the same phi by coalescing moves, we'd have problems guaranteeing that all the other merged defs stay up-to-date. One tricky part of fixing this is that in order to properly propagate the information from a' to a, we need to do it before the defs for a and a' are merged in coalesceValues(), since we need to know which defs are merged with a but not a' -- after coalesceValues() returns, all the defs have been combined, so we don't know which is which. I took the approach of calling copyCompound() inside coalesceValues(), instead of afterwards. Cc: Ilia MirkinCc: Karol Herbst --- So, I guess curiosity got the best of me :). Of course, I have no actual way to test if this fixes the problem, but hopefully this at least helps someone get further... src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 56 ++ 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index b33d7b4010..2664c0678f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -890,6 +890,34 @@ GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval) livei.insert(lval->livei); } +// Used when coalescing moves. The non-compound value will become one, e.g.: +// mov b32 $r0 $r2/ merge b64 $r0d { $r0 $r1 } +// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d +static inline void copyCompound(Value *dst, Value *src) +{ + LValue *ldst = dst->asLValue(); + LValue *lsrc = src->asLValue(); + + if (ldst->compound && !lsrc->compound) { + LValue *swap = lsrc; + lsrc = ldst; + ldst = swap; + } + + assert(!ldst->compound); + + if (lsrc->compound) { + Value *dstRep = ldst->join; + for (Value::DefIterator d = dstRep->defs.begin(); d != dstRep->defs.end(); + ++d) { + LValue *ldst = (*d)->get()->asLValue(); + assert(!ldst->compound) + ldst->compound = 1; + ldst->compMask = lsrc->compMask; + } + } +} + bool GCRA::coalesceValues(Value *dst, Value *src, bool force) { @@ -932,9 +960,16 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force) if (!force && nRep->livei.overlaps(nVal->livei)) return false; + // TODO: Handle this case properly. + if (!force && rep->compound && val->compound) + return false; + INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n", rep->id, rep->reg.data.id, val->id); + if (!force) + copyCompound(dst, src); + // set join pointer of all values joined with val for (Value::DefIterator def = val->defs.begin(); def != val->defs.end(); ++def) @@ -997,24 +1032,6 @@ static inline uint8_t makeCompMask(int compSize, int base, int size) } } -// Used when coalescing moves. The non-compound value will become one, e.g.: -// mov b32 $r0 $r2/ merge b64 $r0d { $r0 $r1 } -// split b64 { $r0 $r1 } $r0d / mov b64
Re: [Mesa-dev] [PATCH 4/8] intel/blorp: Add suport for fast clear address.
On Fri, Dec 15, 2017 at 2:53 PM, Rafael Antognolli < rafael.antogno...@intel.com> wrote: > On gen10+, if surface->clear_color_addr is present, use it directly > intead of copying it to the surface state. > > Signed-off-by: Rafael Antognolli> --- > src/intel/blorp/blorp_genX_exec.h | 12 +--- > 1 file changed, 9 insertions(+), 3 deletions(-) > > diff --git a/src/intel/blorp/blorp_genX_exec.h > b/src/intel/blorp/blorp_genX_exec.h > index 1968460be05..a01d21c2cdd 100644 > --- a/src/intel/blorp/blorp_genX_exec.h > +++ b/src/intel/blorp/blorp_genX_exec.h > @@ -1276,11 +1276,15 @@ blorp_emit_surface_state(struct blorp_batch > *batch, > write_disable_mask |= ISL_CHANNEL_ALPHA_BIT; > } > > + const bool use_clear_address = > + GEN_GEN >= 10 && (surface->clear_color_addr.buffer != NULL); > + > isl_surf_fill_state(batch->blorp->isl_dev, state, > .surf = , .view = >view, > .aux_surf = >aux_surf, .aux_usage = > aux_usage, > .mocs = surface->addr.mocs, > .clear_color = surface->clear_color, > + .use_clear_address = use_clear_address, > .write_disables = write_disable_mask); > > blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, > @@ -1300,9 +1304,11 @@ blorp_emit_surface_state(struct blorp_batch *batch, > blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * > 4); > > if (surface->clear_color_addr.buffer) { > -#if GEN_GEN > 10 > - unreachable("Implement indirect clear support on gen11+"); > -#elif GEN_GEN >= 7 && GEN_GEN <= 10 > +#if GEN_GEN >= 10 > + assert((surface->clear_color_addr.offset & 0x3f) == 0); > + blorp_surface_reloc(batch, state_offset + isl_dev->ss.clear_value_ > offset, > Are the value offset and the address offset the same? I think they are, but just want to be sure. :-) > + surface->clear_color_addr, 0); > +#elif GEN_GEN >= 7 && GEN_GEN < 10 >struct blorp_address dst_addr = blorp_get_surface_base_ > address(batch); >dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset; >blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr, > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/8] anv: Make the clear state buffer 64 bytes aligned.
On Mon, Jan 8, 2018 at 3:00 PM, Nanley Cherywrote: > On Fri, Dec 15, 2017 at 02:53:30PM -0800, Rafael Antognolli wrote: > > On Gen10+, if we use the clear state address field in the surface state > > instead of the clear color directly, there's a restriction that the > > address must point to the lower part of a 64 byte cache-line. > > > > Signed-off-by: Rafael Antognolli > > --- > > src/intel/vulkan/anv_private.h | 12 +++- > > 1 file changed, 11 insertions(+), 1 deletion(-) > > > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_ > private.h > > index b7bde4b8ce6..43cbf065724 100644 > > --- a/src/intel/vulkan/anv_private.h > > +++ b/src/intel/vulkan/anv_private.h > > @@ -2490,7 +2490,17 @@ anv_fast_clear_state_entry_size(const struct > anv_device *device) > > * GPU memcpy operations. > > */ > > assert(device->isl_dev.ss.clear_value_size % 4 == 0); > > - return device->isl_dev.ss.clear_value_size + 4; > > + > > + const unsigned entry_size = device->isl_dev.ss.clear_value_size + 4; > > + /* On Gen10+, we use the clear color address of the surface to point > to this > > +* buffer directly. However, according to the bspec: > > +* > > +*The memory layout of the clear color pointed to by this > address is a > > +*value stored in the lower-order bytes of a 64-byte cache-line. > > +* > > +* So add some padding here for Gen10+. > > +*/ > > I don't see any indication that the upper bytes may be modified by the > hardware. For that reason, I think we can assume that the image that > precedes this entry is at least 64 bytes and avoid padding the entry. I'm not sure what you mean by this. > > + return device->info.gen >= 10 ? ALIGN(entry_size, 64) : entry_size; > > > > } > > > > static inline struct anv_address > > -- > > 2.14.3 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/8] intel/isl: Add support to emit clear value address.
On Mon, Jan 8, 2018 at 2:29 PM, Nanley Cherywrote: > On Fri, Dec 15, 2017 at 02:53:29PM -0800, Rafael Antognolli wrote: > > gen10 can emit the clear color by setting it on a buffer somewhere, and > > then adding only the address to the surface state. > > > > This commit add support for that on isl_surf_fill_state, and if that is > > requested, skip setting the clear value itself. > > > > Signed-off-by: Rafael Antognolli > > --- > > src/intel/isl/isl.h | 9 + > > src/intel/isl/isl_surface_state.c | 15 +++ > > 2 files changed, 20 insertions(+), 4 deletions(-) > > > > diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h > > index e3acb0ec280..c6e1fee27c1 100644 > > --- a/src/intel/isl/isl.h > > +++ b/src/intel/isl/isl.h > > @@ -1277,6 +1277,15 @@ struct isl_surf_fill_state_info { > > */ > > union isl_color_value clear_color; > > > > + /** > > +* Send only the clear value address > > +* > > +* If set, we only pass the clear address to the GPU and it will > fetch it > > +* from wherever it is. > > +*/ > > + bool use_clear_address; > > + uint64_t clear_address; > > + > > /** > > * Surface write disables for gen4-5 > > */ > > diff --git a/src/intel/isl/isl_surface_state.c > b/src/intel/isl/isl_surface_state.c > > index bfb27fa4a44..14741459687 100644 > > --- a/src/intel/isl/isl_surface_state.c > > +++ b/src/intel/isl/isl_surface_state.c > > @@ -635,11 +635,18 @@ isl_genX(surf_fill_state_s)(const struct > isl_device *dev, void *state, > > #endif > > > > if (info->aux_usage != ISL_AUX_USAGE_NONE) { > > +#if GEN_GEN >= 10 > > + s.ClearValueAddressEnable = info->use_clear_address; > > + s.ClearValueAddressHigh = info->clear_address >> 32; > > + s.ClearValueAddressLow = info->clear_address; > > +#endif > > #if GEN_GEN >= 9 > > - s.RedClearColor = info->clear_color.u32[0]; > > - s.GreenClearColor = info->clear_color.u32[1]; > > - s.BlueClearColor = info->clear_color.u32[2]; > > - s.AlphaClearColor = info->clear_color.u32[3]; > > + if (!info->use_clear_address) { > > + s.RedClearColor = info->clear_color.u32[0]; > > + s.GreenClearColor = info->clear_color.u32[1]; > > + s.BlueClearColor = info->clear_color.u32[2]; > > + s.AlphaClearColor = info->clear_color.u32[3]; > > + } > > It'd be nice to assert that use_clear_address is false for gen9. > Yes it would. How about something like this: if (info->use_clear_address) { #if GEN_GEN >= 10 s.ClearValueAddressEnable = true; s.ClearValueAddress = info->clear_address; #else unreachable("Gen9 and earlier do not support indirect clear colors"); #endif } else { // Set clear colors } > -Nanley > > > #elif GEN_GEN >= 7 > >/* Prior to Sky Lake, we only have one bit for the clear color > which > > * gives us 0 or 1 in whatever the surface's format happens to be. > > -- > > 2.14.3 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/8] intel/genxml: Use a single field for clear color address on gen10.
On Fri, Dec 15, 2017 at 2:53 PM, Rafael Antognolli < rafael.antogno...@intel.com> wrote: > genxml does not support having two address fields with different names > but same position in the state struct. Both "Clear Color Address" > and "Clear Depth Address Low" mean the same thing, only for different > surface types. > > To workaround this genxml limitation, rename "Clear Color Address" > to "Clear Value Address Low" and use it for both color and depth. Do the > same for the high bits. > > TODO: add support for multiple addresses at the same position in the > xml. > > Signed-off-by: Rafael Antognolli> --- > src/intel/genxml/gen10.xml | 10 ++ > 1 file changed, 6 insertions(+), 4 deletions(-) > > diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml > index a6b8f48fda5..aeaa77871c8 100644 > --- a/src/intel/genxml/gen10.xml > +++ b/src/intel/genxml/gen10.xml > @@ -801,11 +801,13 @@ > > > > - type="address"/> > - type="address"/> > + > + type="address"/> > > - type="address"/> > - type="address"/> > + > + type="address"/> > These two fields are consecutive. We need to combine them into a single 64-bit address field so that 48-bit addresses with relocations will work correctly. > > > > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/8] intel/genxml: Use a single field for clear color address on gen10.
On Fri, Dec 15, 2017 at 02:53:28PM -0800, Rafael Antognolli wrote: > genxml does not support having two address fields with different names > but same position in the state struct. Both "Clear Color Address" > and "Clear Depth Address Low" mean the same thing, only for different > surface types. > > To workaround this genxml limitation, rename "Clear Color Address" > to "Clear Value Address Low" and use it for both color and depth. Do the > same for the high bits. > > TODO: add support for multiple addresses at the same position in the > xml. > > Signed-off-by: Rafael Antognolli> --- > src/intel/genxml/gen10.xml | 10 ++ > 1 file changed, 6 insertions(+), 4 deletions(-) > This patch is Reviewed-by: Nanley Chery > diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml > index a6b8f48fda5..aeaa77871c8 100644 > --- a/src/intel/genxml/gen10.xml > +++ b/src/intel/genxml/gen10.xml > @@ -801,11 +801,13 @@ > > > > - > - type="address"/> > + > + type="address"/> > > - type="address"/> > - type="address"/> > + > + type="address"/> > > > > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 8/8] i965/surface_state: Silence warning.
On Fri, Dec 15, 2017 at 02:53:35PM -0800, Rafael Antognolli wrote: > This warning showed up after aux_bo started being used inside > > if (use_clear_address) {... > > But use_clear_address depends on aux_surf being not null, in which case > aux_bo would also be set. Make the compiler happy anyway. > > Signed-off-by: Rafael Antognolli> --- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > This patch is Reviewed-by: Nanley Chery > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index 9f583ca995f..78968aa83d0 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -148,7 +148,7 @@ brw_emit_surface_state(struct brw_context *brw, > > union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } }; > > - struct brw_bo *aux_bo; > + struct brw_bo *aux_bo = NULL; > struct isl_surf *aux_surf = NULL; > uint64_t aux_offset = 0; > switch (aux_usage) { > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/8] i965/miptree: Add space to store the clear value in the aux surface.
On Fri, Dec 15, 2017 at 02:53:32PM -0800, Rafael Antognolli wrote: > Similarly to vulkan where we store the clear value in the aux surface, > we can do the same in GL. > > Signed-off-by: Rafael Antognolli> --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 25 + > 1 file changed, 25 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index ead0c359c0f..6400a2a616a 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -1663,6 +1663,21 @@ intel_miptree_init_mcs(struct brw_context *brw, > brw_bo_unmap(mt->mcs_buf->bo); > } > > +static unsigned > +fast_clear_state_entry_size(const struct brw_context *brw) > +{ > + assert(brw); > + > + /* Entry contents: > +* ++ > +* | clear value dword(s) | > +* ++ > +*/ > + assert(brw->isl_dev.ss.clear_value_size % 4 == 0); > + > + return brw->isl_dev.ss.clear_value_size; > +} > + Do you think more may fields may be added to this in the future? I'm trying to understand why we have an additional function here. > static struct intel_miptree_aux_buffer * > intel_alloc_aux_buffer(struct brw_context *brw, > const char *name, > @@ -1675,6 +1690,16 @@ intel_alloc_aux_buffer(struct brw_context *brw, >return false; > > buf->size = aux_surf->size; > + > + const struct gen_device_info *devinfo = >screen->devinfo; > + if (devinfo->gen >= 10) { > + /* On CNL, instead of setting the clear color in the SURFACE_STATE, we > + * will set a pointer to a dword somewhere that contains the color. So, > + * allocate the space for the clear color value here on the aux buffer. > + */ > + buf->size += fast_clear_state_entry_size(brw); I like the plan of allocating more space here. > + } > + > buf->pitch = aux_surf->row_pitch; > buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf); > > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa/st: translate SO info in glsl_to_nir() case
Reviewed-by: Timothy ArceriOn 09/01/18 02:41, Karol Herbst wrote: From: Rob Clark This was handled for VS, but not for GS. Fixes for gallium drivers using nir: spec@arb_gpu_shader5@arb_gpu_shader5-xfb-streams-without-invocations spec@arb_gpu_shader5@arb_gpu_shader5-xfb-streams* spec@arb_transform_feedback3@arb_transform_feedback3-ext_interleaved_two_bufs_gs* spec@ext_transform_feedback@geometry-shaders-basic spec@ext_transform_feedback@* use_gs spec@glsl-1.50@execution@geometry@primitive-id* spec@glsl-1.50@execution@geometry@tri-strip-ordering-with-prim-restart gl_triangle_strip * spec@glsl-1.50@transform-feedback-builtins spec@glsl-1.50@transform-feedback-type-and-size Signed-off-by: Rob Clark Tested-by: Karol Herbst Cc: Rob Clark Cc: Timothy Arceri --- src/mesa/state_tracker/st_program.c | 61 ++--- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 05e6042f42..5efd83d5a8 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -1421,6 +1421,50 @@ st_translate_program_common(struct st_context *st, } } +/** + * Update stream-output info for GS/TCS/TES. Normally this is done in + * st_translate_program_common() but that is not called for glsl_to_nir + * case. + */ +static void +st_translate_program_stream_output(struct gl_program *prog, + struct pipe_stream_output_info *stream_output) +{ + if (!prog->sh.LinkedTransformFeedback) + return; + + ubyte outputMapping[VARYING_SLOT_TESS_MAX]; + GLuint attr; + uint num_outputs = 0; + + memset(outputMapping, 0, sizeof(outputMapping)); + + /* +* Determine number of outputs, the (default) output register +* mapping and the semantic information for each output. +*/ + for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { + if (prog->info.outputs_written & BITFIELD64_BIT(attr)) { + GLuint slot = num_outputs++; + + outputMapping[attr] = slot; + } + } + + /* Also add patch outputs. */ + for (attr = 0; attr < 32; attr++) { + if (prog->info.patch_outputs_written & (1u << attr)) { + GLuint slot = num_outputs++; + GLuint patch_attr = VARYING_SLOT_PATCH0 + attr; + + outputMapping[patch_attr] = slot; + } + } + + st_translate_stream_output_info2(prog->sh.LinkedTransformFeedback, +outputMapping, +stream_output); +} /** * Translate a geometry program to create a new variant. @@ -1432,8 +1476,10 @@ st_translate_geometry_program(struct st_context *st, struct ureg_program *ureg; /* We have already compiled to NIR so just return */ - if (stgp->shader_program) + if (stgp->shader_program) { + st_translate_program_stream_output(>Base, >tgsi.stream_output); return true; + } ureg = ureg_create_with_screen(PIPE_SHADER_GEOMETRY, st->pipe->screen); if (ureg == NULL) @@ -1489,6 +1535,7 @@ st_get_basic_variant(struct st_context *st, tgsi.ir.nir = nir_shader_clone(NULL, prog->tgsi.ir.nir); st_finalize_nir(st, >Base, prog->shader_program, tgsi.ir.nir); +tgsi.stream_output = prog->tgsi.stream_output; } else tgsi = prog->tgsi; /* fill in new variant */ @@ -1529,9 +1576,11 @@ st_translate_tessctrl_program(struct st_context *st, { struct ureg_program *ureg; - /* We have already compiler to NIR so just return */ - if (sttcp->shader_program) + /* We have already compiled to NIR so just return */ + if (sttcp->shader_program) { + st_translate_program_stream_output(>Base, >tgsi.stream_output); return true; + } ureg = ureg_create_with_screen(PIPE_SHADER_TESS_CTRL, st->pipe->screen); if (ureg == NULL) @@ -1558,9 +1607,11 @@ st_translate_tesseval_program(struct st_context *st, { struct ureg_program *ureg; - /* We have already compiler to NIR so just return */ - if (sttep->shader_program) + /* We have already compiled to NIR so just return */ + if (sttep->shader_program) { + st_translate_program_stream_output(>Base, >tgsi.stream_output); return true; + } ureg = ureg_create_with_screen(PIPE_SHADER_TESS_EVAL, st->pipe->screen); if (ureg == NULL) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/8] anv: Make the clear state buffer 64 bytes aligned.
On Fri, Dec 15, 2017 at 02:53:30PM -0800, Rafael Antognolli wrote: > On Gen10+, if we use the clear state address field in the surface state > instead of the clear color directly, there's a restriction that the > address must point to the lower part of a 64 byte cache-line. > > Signed-off-by: Rafael Antognolli> --- > src/intel/vulkan/anv_private.h | 12 +++- > 1 file changed, 11 insertions(+), 1 deletion(-) > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h > index b7bde4b8ce6..43cbf065724 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -2490,7 +2490,17 @@ anv_fast_clear_state_entry_size(const struct > anv_device *device) > * GPU memcpy operations. > */ > assert(device->isl_dev.ss.clear_value_size % 4 == 0); > - return device->isl_dev.ss.clear_value_size + 4; > + > + const unsigned entry_size = device->isl_dev.ss.clear_value_size + 4; > + /* On Gen10+, we use the clear color address of the surface to point to > this > +* buffer directly. However, according to the bspec: > +* > +*The memory layout of the clear color pointed to by this address is a > +*value stored in the lower-order bytes of a 64-byte cache-line. > +* > +* So add some padding here for Gen10+. > +*/ I don't see any indication that the upper bytes may be modified by the hardware. For that reason, I think we can assume that the image that precedes this entry is at least 64 bytes and avoid padding the entry. > + return device->info.gen >= 10 ? ALIGN(entry_size, 64) : entry_size; > } > > static inline struct anv_address > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] intel: Apply Geminilake "Barrier Mode" workaround.
On Thu, Jan 04, 2018 at 11:36:48AM -0800, Kenneth Graunke wrote: > Apparently, Geminilake requires you to whack a chicken bit to select > either compute or tessellation mode for barriers. The recommendation > is to switch between them at PIPELINE_SELECT time. > > We may not need to do this all the time, but I don't know that it hurts > either. PIPELINE_SELECT is already a pretty giant stall. > > This appears to fix hangs in tessellation control shaders with barriers > on Geminilake. Note that this requires a corresponding kernel change, > > drm/i915: Whitelist SLICE_COMMON_ECO_CHICKEN1 on Geminilake. > > in order for the register write to actually happen. Without an updated > kernel, this register write will be noop'd and the fix will not work. > --- > src/intel/genxml/gen9.xml | 8 > src/intel/vulkan/genX_cmd_buffer.c | 21 + > src/mesa/drivers/dri/i965/brw_defines.h| 5 + > src/mesa/drivers/dri/i965/brw_misc_state.c | 15 +++ > 4 files changed, 49 insertions(+) > > diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml > index 1422463693d..019d264fb70 100644 > --- a/src/intel/genxml/gen9.xml > +++ b/src/intel/genxml/gen9.xml > @@ -3710,6 +3710,14 @@ > type="bool"/> > > > + > + Kind of nitpicking, but this field means more than a simple enable/disable kind of boolean. In other similar places we used "uint" instead of "bool" to represent that, specially since you are assigning value names to it. For instance, Floating Point Mode is like that, but there are other examples. Maybe we should decide one or another way and making it more consistent. Regardless of that, this patch is Reviewed-by: Rafael Antognolli> + > + > + > + > + > + > > > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > b/src/intel/vulkan/genX_cmd_buffer.c > index 0bd3874db73..25fcce9d01a 100644 > --- a/src/intel/vulkan/genX_cmd_buffer.c > +++ b/src/intel/vulkan/genX_cmd_buffer.c > @@ -2714,6 +2714,8 @@ static void > genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, > uint32_t pipeline) > { > + UNUSED const struct gen_device_info *devinfo = _buffer->device->info; > + > if (cmd_buffer->state.current_pipeline == pipeline) >return; > > @@ -2764,6 +2766,25 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer > *cmd_buffer, >ps.PipelineSelection = pipeline; > } > > +#if GEN_GEN == 9 > + if (devinfo->is_geminilake) { > + /* Project: DevGLK > + * > + * "This chicken bit works around a hardware issue with barrier logic > + * encountered when switching between GPGPU and 3D pipelines. To > + * workaround the issue, this mode bit should be set after a pipeline > + * is selected." > + */ > + uint32_t scec; > + anv_pack_struct(, GENX(SLICE_COMMON_ECO_CHICKEN1), > + .GLKBarrierMode = > + pipeline == GPGPU ? GLK_BARRIER_MODE_GPGPU > +: GLK_BARRIER_MODE_3D_HULL, > + .GLKBarrierModeMask = 1); > + emit_lri(_buffer->batch, GENX(SLICE_COMMON_ECO_CHICKEN1_num), > scec); > + } > +#endif > + > cmd_buffer->state.current_pipeline = pipeline; > } > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 99d41cf1a56..8bf6f68b67c 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1656,4 +1656,9 @@ enum brw_pixel_shader_coverage_mask_mode { > #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */ > # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4) > > +#define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gen9+ */ > +# define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7) > +# define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7) > +# define GLK_SCEC_BARRIER_MODE_MASKREG_MASK(1 << 7) > + > #endif > diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c > b/src/mesa/drivers/dri/i965/brw_misc_state.c > index a1ac0abe285..c4ef6812bff 100644 > --- a/src/mesa/drivers/dri/i965/brw_misc_state.c > +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c > @@ -516,6 +516,21 @@ brw_emit_select_pipeline(struct brw_context *brw, enum > brw_pipeline pipeline) >OUT_BATCH(0); >ADVANCE_BATCH(); > } > + > + if (devinfo->is_geminilake) { > + /* Project: DevGLK > + * > + * "This chicken bit works around a hardware issue with barrier logic > + * encountered when switching between GPGPU and 3D pipelines. To > + * workaround the issue, this mode bit should be set after a pipeline > + * is selected." > + */ > + const unsigned barrier_mode = > + pipeline == BRW_RENDER_PIPELINE ? GLK_SCEC_BARRIER_MODE_3D_HULL > +
Re: [Mesa-dev] Mesa 17.3.2 release candidate
On Monday, 2018-01-08 21:58:16 +, Emil Velikov wrote: > On 8 January 2018 at 12:00, Eric Engestromwrote: > > On Thursday, 2018-01-04 14:10:52 +, Emil Velikov wrote: > >> Hello list, > >> > >> The release candidate for Mesa 17.3.2 is now available. Currently we have: > >> - 13 queued > >> - 18 nominated (outstanding) > >> - and 0 rejected patches > >> > >> > >> In the current queue we have: > >> > >> Multiple fixes in the RADV Vulkan driver, workaround when using slibtool > >> and > >> a GLSL workaround for various titles using Unreal Engine 4. > >> > >> Take a look at section "Mesa stable queue" for more information. > >> > >> > >> Testing reports/general approval > >> > >> > >> Any testing reports (or general approval of the state of the branch) > >> will be greatly appreciated. > >> > >> The plan is to have 17.3.2 this Saturday (6th of January), around or > >> shortly after 15:00 GMT. > >> > >> If you have any questions or suggestions - be that about the current > >> patch queue or otherwise, please go ahead. > >> > >> > >> Cheers, > >> Emil > >> > >> > >> Mesa stable queue > >> - > >> > >> Nominated (18) > >> == > >> > >> Bas Nieuwenhuizen (8): > >> c39947ce304 radv: Invert condition for all samples identical > >> during resolve. > >> cebc9a119d4 radv: Flush caches before subpass resolve. > >> 73279da41da radv: Fix fragment resolve destination offset. > >> da192b50b24 radv: Use correct framebuffer size for partial FS > >> resolves. > >> a636208ace0 radv: Always use fragment resolve if dest uses DCC. > >> 5158603182f radv: Use correct HTILE expanded words. > >> 1c78e4f0535 radv: Allow writing 0 scissors. > >> c99426ea831 ac/nir: Handle loading data from compact arrays. > >> > >> Dave Airlie (8): > >> 38e4467e99a radv: fix events on compute queues. > >> ec1edd0fd29 radv: fix pipeline statistics end query on compute queue > >> d08f2678145 radv/gfx9: fix 3d image to image transfers on compute > >> queues. > >> 09612a62e16 radv/gfx9: fix 3d image clears on compute queues > >> 420627e6e79 radv/gfx9: fix buffer to image for 3d images on compute > >> queues > >> 59515780433 radv/gfx9: fix block compression texture views. > >> 868377ab33e radv/gfx9: use a bigger hammer to flush cb/db caches. > >> a4c23ce1b68 radv/gfx9: use correct swizzle parameter to work out > >> border swizzle. > >> > >> Jason Ekstrand (1): > >> 18fde36ced4 intel/fs: Use the original destination region for > >> int MUL lowering > >> > >> Lucas Stach (1): > >> 01585659245 etnaviv: disable in-place resolve for non-supertiled > >> surfaces > >> > >> > >> Queued (13) > >> === > >> > >> Bas Nieuwenhuizen (1): > >> radv: Fix DCC compatible formats. > >> > >> Brendan King (1): > >> egl: link libEGL against the dynamic version of libglapi > >> > >> Dave Airlie (6): > >> radv/gfx9: add support for 3d images to blit 2d paths > >> radv: handle depth/stencil image copy with layouts better. (v3.1) > >> radv/meta: fix blit paths for depth/stencil (v2.1) > >> radv: fix issue with multisample positions and interp_var_at_sample. > >> radv/gfx9: add 3d sampler image->buffer copy shader. (v3) > >> radv: don't do format replacement on tc compat htile surfaces. > >> > >> Eric Engestrom (1): > >> egl: let each platform decided how to handle LIBGL_ALWAYS_SOFTWARE > > > > I broke Android in this commit, please include RobHer's > > aa187fe7bfac856207a5 "egl/android: Fix build break with > > dri2_initialize_android _EGLDisplay parameter" > > > Thanks Eric - I've pulled the fixup. > > Sadly ssh to people.fd.o fails, so releasing will be shortly delayed :-\ It fails because it's been disabled, in response to the recent CPU vulnerabilities [1]. That said, I'm sure you can ask DanielS or another admin to push the tarball for you ;) [1] https://lists.freedesktop.org/archives/freedesktop/2018-January/000354.html ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/8] intel/isl: Add support to emit clear value address.
On Fri, Dec 15, 2017 at 02:53:29PM -0800, Rafael Antognolli wrote: > gen10 can emit the clear color by setting it on a buffer somewhere, and > then adding only the address to the surface state. > > This commit add support for that on isl_surf_fill_state, and if that is > requested, skip setting the clear value itself. > > Signed-off-by: Rafael Antognolli> --- > src/intel/isl/isl.h | 9 + > src/intel/isl/isl_surface_state.c | 15 +++ > 2 files changed, 20 insertions(+), 4 deletions(-) > > diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h > index e3acb0ec280..c6e1fee27c1 100644 > --- a/src/intel/isl/isl.h > +++ b/src/intel/isl/isl.h > @@ -1277,6 +1277,15 @@ struct isl_surf_fill_state_info { > */ > union isl_color_value clear_color; > > + /** > +* Send only the clear value address > +* > +* If set, we only pass the clear address to the GPU and it will fetch it > +* from wherever it is. > +*/ > + bool use_clear_address; > + uint64_t clear_address; > + > /** > * Surface write disables for gen4-5 > */ > diff --git a/src/intel/isl/isl_surface_state.c > b/src/intel/isl/isl_surface_state.c > index bfb27fa4a44..14741459687 100644 > --- a/src/intel/isl/isl_surface_state.c > +++ b/src/intel/isl/isl_surface_state.c > @@ -635,11 +635,18 @@ isl_genX(surf_fill_state_s)(const struct isl_device > *dev, void *state, > #endif > > if (info->aux_usage != ISL_AUX_USAGE_NONE) { > +#if GEN_GEN >= 10 > + s.ClearValueAddressEnable = info->use_clear_address; > + s.ClearValueAddressHigh = info->clear_address >> 32; > + s.ClearValueAddressLow = info->clear_address; > +#endif > #if GEN_GEN >= 9 > - s.RedClearColor = info->clear_color.u32[0]; > - s.GreenClearColor = info->clear_color.u32[1]; > - s.BlueClearColor = info->clear_color.u32[2]; > - s.AlphaClearColor = info->clear_color.u32[3]; > + if (!info->use_clear_address) { > + s.RedClearColor = info->clear_color.u32[0]; > + s.GreenClearColor = info->clear_color.u32[1]; > + s.BlueClearColor = info->clear_color.u32[2]; > + s.AlphaClearColor = info->clear_color.u32[3]; > + } It'd be nice to assert that use_clear_address is false for gen9. -Nanley > #elif GEN_GEN >= 7 >/* Prior to Sky Lake, we only have one bit for the clear color which > * gives us 0 or 1 in whatever the surface's format happens to be. > -- > 2.14.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/11] spirv: Add better type validation to OpTypeImage
On Mon, Jan 8, 2018 at 10:33 AM, Lionel Landwerlin < lionel.g.landwer...@intel.com> wrote: > On 17/12/17 05:46, Jason Ekstrand wrote: > >> --- >> src/compiler/spirv/spirv_to_nir.c | 17 ++--- >> 1 file changed, 10 insertions(+), 7 deletions(-) >> >> diff --git a/src/compiler/spirv/spirv_to_nir.c >> b/src/compiler/spirv/spirv_to_nir.c >> index 83c75c7..ffea442 100644 >> --- a/src/compiler/spirv/spirv_to_nir.c >> +++ b/src/compiler/spirv/spirv_to_nir.c >> @@ -1075,10 +1075,12 @@ vtn_handle_type(struct vtn_builder *b, SpvOp >> opcode, >> case SpvOpTypeImage: { >> val->type->base_type = vtn_base_type_image; >> - const struct glsl_type *sampled_type = >> - vtn_value(b, w[2], vtn_value_type_type)->type->type; >> + const struct vtn_type *sampled_type = >> + vtn_value(b, w[2], vtn_value_type_type)->type; >> - vtn_assert(glsl_type_is_vector_or_scalar(sampled_type)); >> + vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar || >> + glsl_get_bit_size(sampled_type->type) != 32, >> + "Sampled type of OpTypeImage must be a 32-bit scalar"); >> > > Maybe I'm missing something, but the 1.3 spec says : > > "Sampled Type is the type of the components that result from sampling or > reading from this image type. Must be a > scalar numerical type or OpTypeVoid." > > "Numerical type: An integer type or a floating-point type." > > "Floating-point type: Any width type from OpTypeFloat." > > And OpTypeFloat can have variable width (like 16bits), so I'm not too sure > about != 32 for the fail_if() above. > Ugh... I don't know what to do with this. GLSL doesn't support anything other than 32-bit types so glslang won't produce anything else. Also, nothing in NIR or GLSL IR is prepared to handle anything other than 32-bit image types. We could bend over backwards to allow it here but there would be no testing. My opinion after a few minutes thought is to leave it as-is. I've filed a Vulkan spec bug. --Jason > enum glsl_sampler_dim dim; >> switch ((SpvDim)w[3]) { >> @@ -1090,7 +1092,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, >> case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; >> case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break; >> default: >> - vtn_fail("Invalid SPIR-V Sampler dimension"); >> + vtn_fail("Invalid SPIR-V image dimensionality"); >> } >> bool is_shadow = w[4]; >> @@ -1115,15 +1117,16 @@ vtn_handle_type(struct vtn_builder *b, SpvOp >> opcode, >> val->type->image_format = translate_image_format(b, format); >> + enum glsl_base_type sampled_base_type = >> + glsl_get_base_type(sampled_type->type); >> if (sampled == 1) { >>val->type->sampled = true; >>val->type->type = glsl_sampler_type(dim, is_shadow, is_array, >> - >> glsl_get_base_type(sampled_type)); >> + sampled_base_type); >> } else if (sampled == 2) { >>vtn_assert(!is_shadow); >>val->type->sampled = false; >> - val->type->type = glsl_image_type(dim, is_array, >> - glsl_get_base_type(sampled_ty >> pe)); >> + val->type->type = glsl_image_type(dim, is_array, >> sampled_base_type); >> } else { >>vtn_fail("We need to know if the image will be sampled"); >> } >> > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 104214] Dota crashes when switching from game to desktop
https://bugs.freedesktop.org/show_bug.cgi?id=104214 Mark Janeschanged: What|Removed |Added CC||thellst...@vmware.com --- Comment #23 from Mark Janes --- Adding Thomas to the CC, since it bisects to his commit. -- You are receiving this mail because: You are the QA Contact for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Mesa 17.3.2 release candidate
On 8 January 2018 at 12:00, Eric Engestromwrote: > On Thursday, 2018-01-04 14:10:52 +, Emil Velikov wrote: >> Hello list, >> >> The release candidate for Mesa 17.3.2 is now available. Currently we have: >> - 13 queued >> - 18 nominated (outstanding) >> - and 0 rejected patches >> >> >> In the current queue we have: >> >> Multiple fixes in the RADV Vulkan driver, workaround when using slibtool and >> a GLSL workaround for various titles using Unreal Engine 4. >> >> Take a look at section "Mesa stable queue" for more information. >> >> >> Testing reports/general approval >> >> >> Any testing reports (or general approval of the state of the branch) >> will be greatly appreciated. >> >> The plan is to have 17.3.2 this Saturday (6th of January), around or >> shortly after 15:00 GMT. >> >> If you have any questions or suggestions - be that about the current >> patch queue or otherwise, please go ahead. >> >> >> Cheers, >> Emil >> >> >> Mesa stable queue >> - >> >> Nominated (18) >> == >> >> Bas Nieuwenhuizen (8): >> c39947ce304 radv: Invert condition for all samples identical >> during resolve. >> cebc9a119d4 radv: Flush caches before subpass resolve. >> 73279da41da radv: Fix fragment resolve destination offset. >> da192b50b24 radv: Use correct framebuffer size for partial FS resolves. >> a636208ace0 radv: Always use fragment resolve if dest uses DCC. >> 5158603182f radv: Use correct HTILE expanded words. >> 1c78e4f0535 radv: Allow writing 0 scissors. >> c99426ea831 ac/nir: Handle loading data from compact arrays. >> >> Dave Airlie (8): >> 38e4467e99a radv: fix events on compute queues. >> ec1edd0fd29 radv: fix pipeline statistics end query on compute queue >> d08f2678145 radv/gfx9: fix 3d image to image transfers on compute >> queues. >> 09612a62e16 radv/gfx9: fix 3d image clears on compute queues >> 420627e6e79 radv/gfx9: fix buffer to image for 3d images on compute >> queues >> 59515780433 radv/gfx9: fix block compression texture views. >> 868377ab33e radv/gfx9: use a bigger hammer to flush cb/db caches. >> a4c23ce1b68 radv/gfx9: use correct swizzle parameter to work out >> border swizzle. >> >> Jason Ekstrand (1): >> 18fde36ced4 intel/fs: Use the original destination region for >> int MUL lowering >> >> Lucas Stach (1): >> 01585659245 etnaviv: disable in-place resolve for non-supertiled >> surfaces >> >> >> Queued (13) >> === >> >> Bas Nieuwenhuizen (1): >> radv: Fix DCC compatible formats. >> >> Brendan King (1): >> egl: link libEGL against the dynamic version of libglapi >> >> Dave Airlie (6): >> radv/gfx9: add support for 3d images to blit 2d paths >> radv: handle depth/stencil image copy with layouts better. (v3.1) >> radv/meta: fix blit paths for depth/stencil (v2.1) >> radv: fix issue with multisample positions and interp_var_at_sample. >> radv/gfx9: add 3d sampler image->buffer copy shader. (v3) >> radv: don't do format replacement on tc compat htile surfaces. >> >> Eric Engestrom (1): >> egl: let each platform decided how to handle LIBGL_ALWAYS_SOFTWARE > > I broke Android in this commit, please include RobHer's > aa187fe7bfac856207a5 "egl/android: Fix build break with > dri2_initialize_android _EGLDisplay parameter" > Thanks Eric - I've pulled the fixup. Sadly ssh to people.fd.o fails, so releasing will be shortly delayed :-\ -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 3/4] i965: Pass brw_growing_bo to grow_buffer().
Cleaner. --- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 20 +--- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 877f68ee7cf..b4fcd92b6bd 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -288,16 +288,16 @@ replace_bo_in_reloc_list(struct brw_reloc_list *rlist, */ static void grow_buffer(struct brw_context *brw, -struct brw_bo **bo_ptr, -uint32_t **map_ptr, +struct brw_growing_bo *grow, unsigned existing_bytes, unsigned new_size) { struct intel_batchbuffer *batch = >batch; struct brw_bufmgr *bufmgr = brw->bufmgr; + struct brw_bo *bo = grow->bo; - uint32_t *old_map = *map_ptr; - struct brw_bo *old_bo = *bo_ptr; + uint32_t *old_map = grow->map; + struct brw_bo *old_bo = grow->bo; struct brw_bo *new_bo = brw_bo_alloc(bufmgr, old_bo->name, new_size, old_bo->align); @@ -307,7 +307,7 @@ grow_buffer(struct brw_context *brw, /* Copy existing data to the new larger buffer */ if (batch->use_shadow_copy) { - new_map = realloc(*map_ptr, new_size); + new_map = realloc(old_map, new_size); } else { new_map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE); memcpy(new_map, old_map, existing_bytes); @@ -353,8 +353,8 @@ grow_buffer(struct brw_context *brw, /* Drop the *bo_ptr reference. This should free the old BO. */ brw_bo_unreference(old_bo); - *bo_ptr = new_bo; - *map_ptr = new_map; + grow->bo = new_bo; + grow->map = new_map; } void @@ -377,8 +377,7 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, const unsigned new_size = MIN2(batch->batch.bo->size + batch->batch.bo->size / 2, MAX_BATCH_SIZE); - grow_buffer(brw, >batch.bo, >batch.map, - batch_used, new_size); + grow_buffer(brw, >batch, batch_used, new_size); batch->map_next = (void *) batch->batch.map + batch_used; assert(batch_used + sz < batch->batch.bo->size); } @@ -1079,8 +1078,7 @@ brw_state_batch(struct brw_context *brw, const unsigned new_size = MIN2(batch->state.bo->size + batch->state.bo->size / 2, MAX_STATE_SIZE); - grow_buffer(brw, >state.bo, >state.map, - batch->state_used, new_size); + grow_buffer(brw, >state, batch->state_used, new_size); assert(offset + size < batch->state.bo->size); } -- 2.15.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3 1/4] i965: Replace cpu_map pointers with a "use_shadow_copy" boolean.
Having a boolean for "we're using malloc'd shadow copies for all buffers" is cleaner than having a cpu_map pointer for each. It was okay when we had one buffer, but this is more obvious. --- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 39 +-- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0f0aad85348..4d29e2ef082 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -473,7 +473,6 @@ struct brw_reloc_list { struct brw_growing_bo { struct brw_bo *bo; uint32_t *map; - uint32_t *cpu_map; }; struct intel_batchbuffer { @@ -492,6 +491,7 @@ struct intel_batchbuffer { uint32_t state_used; enum brw_gpu_ring ring; + bool use_shadow_copy; bool use_batch_first; bool needs_sol_reset; bool state_base_address_emitted; diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index a17e1699254..bfb50053e73 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -83,12 +83,12 @@ intel_batchbuffer_init(struct brw_context *brw) struct intel_batchbuffer *batch = >batch; const struct gen_device_info *devinfo = >devinfo; - if (!devinfo->has_llc) { - batch->batch.cpu_map = malloc(BATCH_SZ); - batch->batch.map = batch->batch.cpu_map; + batch->use_shadow_copy = !devinfo->has_llc; + + if (batch->use_shadow_copy) { + batch->batch.map = malloc(BATCH_SZ); batch->map_next = batch->batch.map; - batch->state.cpu_map = malloc(STATE_SZ); - batch->state.map = batch->state.cpu_map; + batch->state.map = malloc(STATE_SZ); } init_reloc_list(>batch_relocs, 250); @@ -174,7 +174,7 @@ intel_batchbuffer_reset(struct brw_context *brw) batch->last_bo = batch->batch.bo; batch->batch.bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096); - if (!batch->batch.cpu_map) { + if (!batch->use_shadow_copy) { batch->batch.map = brw_bo_map(brw, batch->batch.bo, MAP_READ | MAP_WRITE); } @@ -183,7 +183,7 @@ intel_batchbuffer_reset(struct brw_context *brw) batch->state.bo = brw_bo_alloc(bufmgr, "statebuffer", STATE_SZ, 4096); batch->state.bo->kflags = can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0; - if (!batch->state.cpu_map) { + if (!batch->use_shadow_copy) { batch->state.map = brw_bo_map(brw, batch->state.bo, MAP_READ | MAP_WRITE); } @@ -243,8 +243,10 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw) void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - free(batch->batch.cpu_map); - free(batch->state.cpu_map); + if (batch->use_shadow_copy) { + free(batch->batch.map); + free(batch->state.map); + } for (int i = 0; i < batch->exec_count; i++) { brw_bo_unreference(batch->exec_bos[i]); @@ -284,7 +286,6 @@ static void grow_buffer(struct brw_context *brw, struct brw_bo **bo_ptr, uint32_t **map_ptr, -uint32_t **cpu_map_ptr, unsigned existing_bytes, unsigned new_size) { @@ -301,8 +302,8 @@ grow_buffer(struct brw_context *brw, perf_debug("Growing %s - ran out of space\n", old_bo->name); /* Copy existing data to the new larger buffer */ - if (*cpu_map_ptr) { - *cpu_map_ptr = new_map = realloc(*cpu_map_ptr, new_size); + if (batch->use_shadow_copy) { + new_map = realloc(*map_ptr, new_size); } else { new_map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE); memcpy(new_map, old_map, existing_bytes); @@ -373,7 +374,7 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, MIN2(batch->batch.bo->size + batch->batch.bo->size / 2, MAX_BATCH_SIZE); grow_buffer(brw, >batch.bo, >batch.map, - >batch.cpu_map, batch_used, new_size); + batch_used, new_size); batch->map_next = (void *) batch->batch.map + batch_used; assert(batch_used + sz < batch->batch.bo->size); } @@ -806,14 +807,12 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) struct intel_batchbuffer *batch = >batch; int ret = 0; - if (batch->batch.cpu_map) { + if (batch->use_shadow_copy) { void *bo_map = brw_bo_map(brw, batch->batch.bo, MAP_WRITE); - memcpy(bo_map, batch->batch.cpu_map, 4 * USED_BATCH(*batch)); - } + memcpy(bo_map, batch->batch.map, 4 * USED_BATCH(*batch)); - if (batch->state.cpu_map) { - void *bo_map = brw_bo_map(brw, batch->state.bo, MAP_WRITE); - memcpy(bo_map, batch->state.cpu_map, batch->state_used); + bo_map = brw_bo_map(brw, batch->state.bo, MAP_WRITE); + memcpy(bo_map, batch->state.map, batch->state_used); }
[Mesa-dev] [PATCH v3 4/4] i965: Avoid problems from referencing orphaned BOs after growing.
Growing the batch/state buffer is a lot more dangerous than I thought. A number of places emit multiple state buffer sections, and then write data to the returned pointer, or save a pointer to brw->batch.state.bo and then use it in relocations. If each call can grow, this can result in stale map references or stale BO pointers. Furthermore, fences refer to the old batch BO, and that reference needs to continue working. To avoid these woes, we avoid ever swapping the brw->batch.*.bo pointer, instead exchanging the brw_bo structures in place. That way, stale BO references are fine - the GEM handle changes, but the brw_bo pointer doesn't. We also defer the memcpy until a quiescent point, so callers can write to the returned pointer - which may be in either BO - and we'll sort it out and combine the two properly in the end. v2: - Handle stale pointers in the shadow copy case, where realloc may or may not move our shadow copy to a new address. - Track the partial map explicitly, to avoid problems with buffer reuse where multiple map modes exist (caught by Chris Wilson). Fixes: 2dfc119f22f257082ab0 "i965: Grow the batch/state buffers if we need space and can't flush." --- src/mesa/drivers/dri/i965/brw_context.h | 3 + src/mesa/drivers/dri/i965/intel_batchbuffer.c | 128 +- 2 files changed, 107 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4d29e2ef082..1167d04a32a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -473,6 +473,9 @@ struct brw_reloc_list { struct brw_growing_bo { struct brw_bo *bo; uint32_t *map; + struct brw_bo *partial_bo; + uint32_t *partial_bo_map; + unsigned partial_bytes; }; struct intel_batchbuffer { diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index b4fcd92b6bd..ca027ebff9f 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -171,6 +171,9 @@ recreate_growing_buffer(struct brw_context *brw, grow->bo = brw_bo_alloc(bufmgr, name, size, 4096); grow->bo->kflags = can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0; + grow->partial_bo = NULL; + grow->partial_bo_map = NULL; + grow->partial_bytes = 0; if (!batch->use_shadow_copy) grow->map = brw_bo_map(brw, grow->bo, MAP_READ | MAP_WRITE); @@ -267,6 +270,32 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch) _mesa_hash_table_destroy(batch->state_batch_sizes, NULL); } +/** + * Finish copying the old batch/state buffer's contents to the new one + * after we tried to "grow" the buffer in an earlier operation. + */ +static void +finish_growing_bos(struct brw_growing_bo *grow) +{ + struct brw_bo *old_bo = grow->partial_bo; + if (!old_bo) + return; + + /* If using a shadow copy, realloc may have returned the same pointer +* when growing, so there's no need to memcpy. Or it might have moved, +* at which point we need to copy. +*/ + if (grow->map != grow->partial_bo_map) { + memcpy(grow->map, grow->partial_bo_map, grow->partial_bytes); + } + + grow->partial_bo = NULL; + grow->partial_bo_map = NULL; + grow->partial_bytes = 0; + + brw_bo_unreference(old_bo); +} + static void replace_bo_in_reloc_list(struct brw_reloc_list *rlist, uint32_t old_handle, uint32_t new_handle) @@ -296,21 +325,26 @@ grow_buffer(struct brw_context *brw, struct brw_bufmgr *bufmgr = brw->bufmgr; struct brw_bo *bo = grow->bo; - uint32_t *old_map = grow->map; - struct brw_bo *old_bo = grow->bo; + perf_debug("Growing %s - ran out of space\n", bo->name); - struct brw_bo *new_bo = - brw_bo_alloc(bufmgr, old_bo->name, new_size, old_bo->align); - uint32_t *new_map; + if (grow->partial_bo) { + /* We've already grown once, and now we need to do it again. + * Finish our last grow operation so we can start a new one. + * This should basically never happen. + */ + perf_debug("Had to grow multiple times"); + finish_growing_bos(grow); + } - perf_debug("Growing %s - ran out of space\n", old_bo->name); + struct brw_bo *new_bo = brw_bo_alloc(bufmgr, bo->name, new_size, bo->align); /* Copy existing data to the new larger buffer */ + grow->partial_bo_map = grow->map; + if (batch->use_shadow_copy) { - new_map = realloc(old_map, new_size); + grow->map = realloc(grow->map, new_size); } else { - new_map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE); - memcpy(new_map, old_map, existing_bytes); + grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE); } /* Try to put the new BO at the same GTT offset as the old BO (which @@ -322,21 +356,18 @@ grow_buffer(struct brw_context *brw, * * Also preserve kflags for EXEC_OBJECT_CAPTURE. */ -
[Mesa-dev] [PATCH v3 2/4] i965: Make a helper for recreating growing buffers.
Now that we have two of these, we're duplicating a bunch of this logic. The next commit will add more logic, which would make the duplication seem worse. This ends up setting EXEC_OBJECT_CAPTURE on the batch, which isn't necessary (it's already captured), but it should be harmless. --- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 30 +++ 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index bfb50053e73..877f68ee7cf 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -161,32 +161,36 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo) } static void -intel_batchbuffer_reset(struct brw_context *brw) +recreate_growing_buffer(struct brw_context *brw, +struct brw_growing_bo *grow, +const char *name, unsigned size) { struct intel_screen *screen = brw->screen; struct intel_batchbuffer *batch = >batch; struct brw_bufmgr *bufmgr = screen->bufmgr; + grow->bo = brw_bo_alloc(bufmgr, name, size, 4096); + grow->bo->kflags = can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0; + + if (!batch->use_shadow_copy) + grow->map = brw_bo_map(brw, grow->bo, MAP_READ | MAP_WRITE); +} + +static void +intel_batchbuffer_reset(struct brw_context *brw) +{ + struct intel_batchbuffer *batch = >batch; + if (batch->last_bo != NULL) { brw_bo_unreference(batch->last_bo); batch->last_bo = NULL; } batch->last_bo = batch->batch.bo; - batch->batch.bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096); - if (!batch->use_shadow_copy) { - batch->batch.map = - brw_bo_map(brw, batch->batch.bo, MAP_READ | MAP_WRITE); - } + recreate_growing_buffer(brw, >batch, "batchbuffer", BATCH_SZ); batch->map_next = batch->batch.map; - batch->state.bo = brw_bo_alloc(bufmgr, "statebuffer", STATE_SZ, 4096); - batch->state.bo->kflags = - can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0; - if (!batch->use_shadow_copy) { - batch->state.map = - brw_bo_map(brw, batch->state.bo, MAP_READ | MAP_WRITE); - } + recreate_growing_buffer(brw, >state, "statebuffer", STATE_SZ); /* Avoid making 0 a valid state offset - otherwise the decoder will try * and decode data when we use offset 0 as a null pointer. -- 2.15.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 15/15] radeonsi: implement 32-bit pointers in user data SGPRs
On 01/06/2018 12:12 PM, Marek Olšák wrote: From: Marek OlšákSGPRS: 2170102 -> 2158430 (-0.54 %) VGPRS: 1645656 -> 1641516 (-0.25 %) Spilled SGPRs: 9078 -> 8810 (-2.95 %) Spilled VGPRs: 130 -> 114 (-12.31 %) Scratch size: 1508 -> 1492 (-1.06 %) dwords per thread Code Size: 52094872 -> 52692540 (1.15 %) bytes These numbers are quite nice, great work! I think it's something I would like to implement for RADV. Just one minor nitpick below. Reviewed-by: Samuel Pitoiset --- src/amd/common/ac_llvm_build.c| 13 +++ src/amd/common/ac_llvm_build.h| 5 + src/gallium/drivers/radeonsi/si_descriptors.c | 10 +- src/gallium/drivers/radeonsi/si_shader.c | 115 +- src/gallium/drivers/radeonsi/si_shader.h | 23 - src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 6 +- 6 files changed, 122 insertions(+), 50 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index ed00d20..02d1b39 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -57,20 +57,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, ctx->context = context; ctx->module = NULL; ctx->builder = NULL; ctx->voidt = LLVMVoidTypeInContext(ctx->context); ctx->i1 = LLVMInt1TypeInContext(ctx->context); ctx->i8 = LLVMInt8TypeInContext(ctx->context); ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); + ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64; ctx->f16 = LLVMHalfTypeInContext(ctx->context); ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f64 = LLVMDoubleTypeInContext(ctx->context); ctx->v2i16 = LLVMVectorType(ctx->i16, 2); ctx->v2i32 = LLVMVectorType(ctx->i32, 2); ctx->v3i32 = LLVMVectorType(ctx->i32, 3); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); ctx->v2f32 = LLVMVectorType(ctx->f32, 2); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); @@ -128,21 +129,24 @@ unsigned ac_get_type_size(LLVMTypeRef type) { LLVMTypeKind kind = LLVMGetTypeKind(type); switch (kind) { case LLVMIntegerTypeKind: return LLVMGetIntTypeWidth(type) / 8; case LLVMFloatTypeKind: return 4; case LLVMDoubleTypeKind: + return 8; case LLVMPointerTypeKind: + if (LLVMGetPointerAddressSpace(type) == AC_CONST_32BIT_ADDR_SPACE) + return 4; return 8; case LLVMVectorTypeKind: return LLVMGetVectorSize(type) * ac_get_type_size(LLVMGetElementType(type)); case LLVMArrayTypeKind: return LLVMGetArrayLength(type) * ac_get_type_size(LLVMGetElementType(type)); default: assert(0); return 0; @@ -2035,10 +2039,19 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMIntEQ, src0, ctx->i32_0, ""), LLVMConstInt(ctx->i32, -1, 0), lsb, ""); } LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) { return LLVMPointerType(LLVMArrayType(elem_type, 0), AC_CONST_ADDR_SPACE); } + +LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) +{ + if (!HAVE_32BIT_POINTERS) + return ac_array_in_const_addr_space(elem_type); + + return LLVMPointerType(LLVMArrayType(elem_type, 0), + AC_CONST_32BIT_ADDR_SPACE); +} diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index b1c4737..5235664 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -27,36 +27,40 @@ #include #include #include "amd_family.h" #ifdef __cplusplus extern "C" { #endif +#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0600) + enum { AC_CONST_ADDR_SPACE = 2, /* CONST is the only address space that selects SMEM loads */ AC_LOCAL_ADDR_SPACE = 3, + AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type has 32 bits */ }; struct ac_llvm_context { LLVMContextRef context; LLVMModuleRef module; LLVMBuilderRef builder; LLVMTypeRef voidt; LLVMTypeRef i1; LLVMTypeRef i8; LLVMTypeRef i16; LLVMTypeRef i32; LLVMTypeRef i64; + LLVMTypeRef intptr; LLVMTypeRef f16; LLVMTypeRef f32; LLVMTypeRef f64; LLVMTypeRef v2i16; LLVMTypeRef v2i32;
Re: [Mesa-dev] [PATCH] spirv: Import 1.2 rev 3 headers from Khronos
We also need to update spirv.core.grammar.json at the same time. I've gone ahead and pulled the latest and squashed it into your patch. I also added Reviewed-by: Jason Ekstrandand pushed. Thanks! On Mon, Jan 8, 2018 at 10:46 AM, Caio Marcelo de Oliveira Filho < caio.olive...@intel.com> wrote: > --- > src/compiler/spirv/spirv.h | 27 --- > 1 file changed, 24 insertions(+), 3 deletions(-) > > diff --git a/src/compiler/spirv/spirv.h b/src/compiler/spirv/spirv.h > index 61559a1c9c..233dc6d691 100644 > --- a/src/compiler/spirv/spirv.h > +++ b/src/compiler/spirv/spirv.h > @@ -1,5 +1,5 @@ > /* > -** Copyright (c) 2014-2017 The Khronos Group Inc. > +** Copyright (c) 2014-2018 The Khronos Group Inc. > ** > ** Permission is hereby granted, free of charge, to any person obtaining > a copy > ** of this software and/or associated documentation files (the > "Materials"), > @@ -51,11 +51,11 @@ > typedef unsigned int SpvId; > > #define SPV_VERSION 0x10200 > -#define SPV_REVISION 1 > +#define SPV_REVISION 3 > > static const unsigned int SpvMagicNumber = 0x07230203; > static const unsigned int SpvVersion = 0x00010200; > -static const unsigned int SpvRevision = 1; > +static const unsigned int SpvRevision = 3; > static const unsigned int SpvOpCodeMask = 0x; > static const unsigned int SpvWordCountShift = 16; > > @@ -134,6 +134,7 @@ typedef enum SpvExecutionMode_ { > SpvExecutionModeLocalSizeId = 38, > SpvExecutionModeLocalSizeHintId = 39, > SpvExecutionModePostDepthCoverage = 4446, > +SpvExecutionModeStencilRefReplacingEXT = 5027, > SpvExecutionModeMax = 0x7fff, > } SpvExecutionMode; > > @@ -458,11 +459,13 @@ typedef enum SpvBuiltIn_ { > SpvBuiltInBaryCoordSmoothCentroidAMD = 4996, > SpvBuiltInBaryCoordSmoothSampleAMD = 4997, > SpvBuiltInBaryCoordPullModelAMD = 4998, > +SpvBuiltInFragStencilRefEXT = 5014, > SpvBuiltInViewportMaskNV = 5253, > SpvBuiltInSecondaryPositionNV = 5257, > SpvBuiltInSecondaryViewportMaskNV = 5258, > SpvBuiltInPositionPerViewNV = 5261, > SpvBuiltInViewportMaskPerViewNV = 5262, > +SpvBuiltInFullyCoveredEXT = 5264, > SpvBuiltInMax = 0x7fff, > } SpvBuiltIn; > > @@ -661,12 +664,20 @@ typedef enum SpvCapability_ { > SpvCapabilityAtomicStorageOps = 4445, > SpvCapabilitySampleMaskPostDepthCoverage = 4447, > SpvCapabilityImageGatherBiasLodAMD = 5009, > +SpvCapabilityFragmentMaskAMD = 5010, > +SpvCapabilityStencilExportEXT = 5013, > +SpvCapabilityImageReadWriteLodAMD = 5015, > SpvCapabilitySampleMaskOverrideCoverageNV = 5249, > SpvCapabilityGeometryShaderPassthroughNV = 5251, > +SpvCapabilityShaderViewportIndexLayerEXT = 5254, > SpvCapabilityShaderViewportIndexLayerNV = 5254, > SpvCapabilityShaderViewportMaskNV = 5255, > SpvCapabilityShaderStereoViewNV = 5259, > SpvCapabilityPerViewAttributesNV = 5260, > +SpvCapabilityFragmentFullyCoveredEXT = 5265, > +SpvCapabilitySubgroupShuffleINTEL = 5568, > +SpvCapabilitySubgroupBufferBlockIOINTEL = 5569, > +SpvCapabilitySubgroupImageBlockIOINTEL = 5570, > SpvCapabilityMax = 0x7fff, > } SpvCapability; > > @@ -991,6 +1002,16 @@ typedef enum SpvOp_ { > SpvOpGroupFMaxNonUniformAMD = 5005, > SpvOpGroupUMaxNonUniformAMD = 5006, > SpvOpGroupSMaxNonUniformAMD = 5007, > +SpvOpFragmentMaskFetchAMD = 5011, > +SpvOpFragmentFetchAMD = 5012, > +SpvOpSubgroupShuffleINTEL = 5571, > +SpvOpSubgroupShuffleDownINTEL = 5572, > +SpvOpSubgroupShuffleUpINTEL = 5573, > +SpvOpSubgroupShuffleXorINTEL = 5574, > +SpvOpSubgroupBlockReadINTEL = 5575, > +SpvOpSubgroupBlockWriteINTEL = 5576, > +SpvOpSubgroupImageBlockReadINTEL = 5577, > +SpvOpSubgroupImageBlockWriteINTEL = 5578, > SpvOpMax = 0x7fff, > } SpvOp; > > -- > 2.15.1 > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 14/15] ac: place amdgpu.uniform on loads instead of GEPs
On 01/06/2018 12:12 PM, Marek Olšák wrote: From: Marek Olšák--- src/amd/common/ac_llvm_build.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 164f310..ed00d20 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -775,25 +775,28 @@ ac_build_indexed_store(struct ac_llvm_context *ctx, * dynamically uniform (i.e. load to an SGPR) * \param invariant Whether the load is invariant (no other opcodes affect it) */ static LLVMValueRef ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index, bool uniform, bool invariant) { LLVMValueRef pointer, result; pointer = ac_build_gep0(ctx, base_ptr, index); - if (uniform) + /* This will be removed by InstCombine if index == 0. */ + if (HAVE_LLVM < 0x0600 && uniform) IMHO, it would be better to add more explanations here. Also, I would like to test 11, so for now 12-14 are: Reviewed-by: Samuel Pitoiset LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); result = LLVMBuildLoad(ctx->builder, pointer, ""); if (invariant) LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); + if (HAVE_LLVM >= 0x0600 && uniform) + LLVMSetMetadata(result, ctx->uniform_md_kind, ctx->empty_md); return result; } LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) { return ac_build_load_custom(ctx, base_ptr, index, false, false); } LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] tgsi: include struct definitions for tgsi_build declarations
Many of the functions declared in tgsi_build.h return structs (not struct pointers). Therefore the full struct definitions are needed to avoid warnings or errors: In file included from src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp:23: external/mesa3d/src/gallium/auxiliary/tgsi/tgsi_build.h:47:1: error: 'tgsi_build_header' has C-linkage specified, but returns incomplete type 'struct tgsi_header' which could be incompatible with C [-Werror,-Wreturn-type-c-linkage] This error shows up on Android builds using clang and -Werror. Cc: Ilia MirkinSigned-off-by: Rob Herring --- src/gallium/auxiliary/tgsi/tgsi_build.h | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 53f31932c021..053292957211 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -28,11 +28,7 @@ #ifndef TGSI_BUILD_H #define TGSI_BUILD_H - -struct tgsi_token; -struct tgsi_full_dst_register; -struct tgsi_full_src_register; - +#include #if defined __cplusplus extern "C" { -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 10/15] radeonsi: disallow constant buffers with a 64-bit address in slot 0
9-10: Reviewed-by: Samuel PitoisetOn 01/06/2018 12:12 PM, Marek Olšák wrote: From: Marek Olšák State trackers must use a user buffer or const_uploader, or set pipe_resource::flags same as const_uploader->flags. --- src/gallium/drivers/radeonsi/si_descriptors.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 17115e1..b372090 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1207,20 +1207,26 @@ void si_set_rw_buffer(struct si_context *sctx, static void si_pipe_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type shader, uint slot, const struct pipe_constant_buffer *input) { struct si_context *sctx = (struct si_context *)ctx; if (shader >= SI_NUM_SHADERS) return; + if (slot == 0 && input && input->buffer && + !(r600_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) { + assert(!"constant buffer 0 must have a 32-bit VM address, use const_uploader"); + return; + } + slot = si_get_constbuf_slot(slot); si_set_constant_buffer(sctx, >const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), slot, input); } void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot, struct pipe_constant_buffer *cbuf) { cbuf->user_buffer = NULL; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/15] winsys/radeon: implement and enable 32-bit VM allocations
Patches 5-8 are: Reviewed-by: Samuel PitoisetOn 01/06/2018 12:12 PM, Marek Olšák wrote: From: Marek Olšák --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 42 +++ src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 28 ++- src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 2 ++ 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index bbfe5cc..06842a4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -242,32 +242,54 @@ static uint64_t radeon_bomgr_find_va(const struct radeon_info *info, if ((hole->size - waste) == size) { hole->size = waste; mtx_unlock(>mutex); return offset; } } offset = heap->start; waste = offset % alignment; waste = waste ? alignment - waste : 0; + +if (offset + waste + size > heap->end) { +mtx_unlock(>mutex); +return 0; +} + if (waste) { n = CALLOC_STRUCT(radeon_bo_va_hole); n->size = waste; n->offset = offset; list_add(>list, >holes); } offset += waste; heap->start += size + waste; mtx_unlock(>mutex); return offset; } +static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws, + uint64_t size, uint64_t alignment) +{ +uint64_t va = 0; + +/* Try to allocate from the 64-bit address space first. + * If it doesn't exist (start = 0) or if it doesn't have enough space, + * fall back to the 32-bit address space. + */ +if (ws->vm64.start) +va = radeon_bomgr_find_va(>info, >vm64, size, alignment); +if (!va) +va = radeon_bomgr_find_va(>info, >vm32, size, alignment); +return va; +} + static void radeon_bomgr_free_va(const struct radeon_info *info, struct radeon_vm_heap *heap, uint64_t va, uint64_t size) { struct radeon_bo_va_hole *hole = NULL; size = align(size, info->gart_page_size); mtx_lock(>mutex); if ((va + size) == heap->start) { @@ -363,21 +385,23 @@ void radeon_bo_destroy(struct pb_buffer *_buf) if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, , sizeof(va)) != 0 && va.operation == RADEON_VA_RESULT_ERROR) { fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n"); fprintf(stderr, "radeon:size : %"PRIu64" bytes\n", bo->base.size); fprintf(stderr, "radeon:va: 0x%"PRIx64"\n", bo->va); } } - radeon_bomgr_free_va(>info, >vm64, bo->va, bo->base.size); + radeon_bomgr_free_va(>info, + bo->va < rws->vm32.end ? >vm32 : >vm64, + bo->va, bo->base.size); } /* Close object. */ args.handle = bo->handle; drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, ); mtx_destroy(>u.real.map_mutex); if (bo->initial_domain & RADEON_DOMAIN_VRAM) rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size); @@ -653,22 +677,28 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, if (heap >= 0) { pb_cache_init_entry(>bo_cache, >u.real.cache_entry, >base, heap); } if (rws->info.has_virtual_memory) { struct drm_radeon_gem_va va; unsigned va_gap_size; va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; -bo->va = radeon_bomgr_find_va(>info, >vm64, - size + va_gap_size, alignment); + +if (flags & RADEON_FLAG_32BIT) { +bo->va = radeon_bomgr_find_va(>info, >vm32, + size + va_gap_size, alignment); +assert(bo->va + size < rws->vm32.end); +} else { +bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment); +} va.handle = bo->handle; va.vm_id = 0; va.operation = RADEON_VA_MAP; va.flags = RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_WRITEABLE | RADEON_VM_PAGE_SNOOPED; va.offset = bo->va; r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, , sizeof(va)); if (r && va.operation == RADEON_VA_RESULT_ERROR) { @@ -1055,22 +1085,21 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws, bo->hash = __sync_fetch_and_add(>next_bo_hash, 1); (void) mtx_init(>u.real.map_mutex, mtx_plain); util_hash_table_set(ws->bo_handles,
Re: [Mesa-dev] [PATCH 04/15] pb_cache: let drivers choose the number of buckets
On 01/06/2018 12:12 PM, Marek Olšák wrote: From: Marek Olšák--- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_cache.c| 20 src/gallium/auxiliary/pipebuffer/pb_cache.h| 6 -- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 1 - src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 3 ++- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 1 - src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 3 ++- 7 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 24831f6..4e70048 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -297,16 +297,16 @@ pb_cache_manager_create(struct pb_manager *provider, return NULL; mgr = CALLOC_STRUCT(pb_cache_manager); if (!mgr) return NULL; mgr->base.destroy = pb_cache_manager_destroy; mgr->base.create_buffer = pb_cache_manager_create_buffer; mgr->base.flush = pb_cache_manager_flush; mgr->provider = provider; - pb_cache_init(>cache, usecs, size_factor, bypass_usage, + pb_cache_init(>cache, 1, usecs, size_factor, bypass_usage, maximum_cache_size, _pb_cache_buffer_destroy, pb_cache_can_reclaim_buffer); return >base; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_cache.c b/src/gallium/auxiliary/pipebuffer/pb_cache.c index dd479ae..af899a2 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_cache.c @@ -85,21 +85,21 @@ pb_cache_add_buffer(struct pb_cache_entry *entry) struct pb_cache *mgr = entry->mgr; struct list_head *cache = >buckets[entry->bucket_index]; struct pb_buffer *buf = entry->buffer; unsigned i; mtx_lock(>mutex); assert(!pipe_is_referenced(>reference)); int64_t current_time = os_time_get(); - for (i = 0; i < ARRAY_SIZE(mgr->buckets); i++) + for (i = 0; i < mgr->num_heaps; i++) release_expired_buffers_locked(>buckets[i], current_time); /* Directly release any buffer that exceeds the limit. */ if (mgr->cache_size + buf->size > mgr->max_cache_size) { mgr->destroy_buffer(buf); mtx_unlock(>mutex); return; } entry->start = os_time_get(); @@ -146,20 +146,22 @@ pb_cache_is_buffer_compat(struct pb_cache_entry *entry, struct pb_buffer * pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size, unsigned alignment, unsigned usage, unsigned bucket_index) { struct pb_cache_entry *entry; struct pb_cache_entry *cur_entry; struct list_head *cur, *next; int64_t now; int ret = 0; + + assert(bucket_index < mgr->num_heaps); struct list_head *cache = >buckets[bucket_index]; mtx_lock(>mutex); entry = NULL; cur = cache->next; next = cur->next; /* search in the expired buffers, freeing them in the process */ now = os_time_get(); @@ -222,39 +224,41 @@ pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size, * Empty the cache. Useful when there is not enough memory. */ void pb_cache_release_all_buffers(struct pb_cache *mgr) { struct list_head *curr, *next; struct pb_cache_entry *buf; unsigned i; mtx_lock(>mutex); - for (i = 0; i < ARRAY_SIZE(mgr->buckets); i++) { + for (i = 0; i < mgr->num_heaps; i++) { struct list_head *cache = >buckets[i]; curr = cache->next; next = curr->next; while (curr != cache) { buf = LIST_ENTRY(struct pb_cache_entry, curr, head); destroy_buffer_locked(buf); curr = next; next = curr->next; } } mtx_unlock(>mutex); } void pb_cache_init_entry(struct pb_cache *mgr, struct pb_cache_entry *entry, struct pb_buffer *buf, unsigned bucket_index) { + assert(bucket_index < mgr->num_heaps); + memset(entry, 0, sizeof(*entry)); entry->buffer = buf; entry->mgr = mgr; entry->bucket_index = bucket_index; } /** * Initialize a caching buffer manager. * * @param mgr The cache buffer manager @@ -263,40 +267,48 @@ pb_cache_init_entry(struct pb_cache *mgr, struct pb_cache_entry *entry, * @param size_factor Declare buffers that are size_factor times bigger than * the requested size as cache hits. * @param bypass_usage Bitmask. If (requested usage & bypass_usage) != 0, * buffer allocation requests are rejected. * @param maximum_cache_size Maximum size of all unused buffers the cache can *hold. * @param destroy_buffer Function that destroys a buffer for
Re: [Mesa-dev] [PATCH 00/11] SPIR-V More error checking/validation improvements
I have a nit for patch 6 and a question on patch 9, maybe there is a thing on change on that one. Otherwise this is : Reviewed-by: Lionel LandwerlinOn 17/12/17 05:46, Jason Ekstrand wrote: This series is another 11 patches in the direction of improving SPIR-V error messages and validation. The purpose here isn't really to validate the SPIR-V (I'm sure there will be many things we won't error on) but to make it such that the assumptions we do make about the incoming SPIR-V and require for correctness are well-documented and based on actual SPIR-V constructs instead of obscure asserts deep inside spirv_to_nir. A good example of this is the penultimate patch of this series which reworks our error checking for decorations. The biggest potential failure point for decorations is struct member decorations which are either not on an OpTypeStruct or have a member literal that is out-of-bounds. By carefully checking for all this up-front, we can allow later code to just assume everything is sane. It's also theoretically possible to construct (invalid) SPIR-V which has nested member decorations. This isn't allowed by the spec but it's nicer to throw an error than to just get confused or, even worse, crash. The final patch in the series follows this up by replacing a bunch of vtn_asserts in various bits of decoration handling code with regular debug-only asserts that should now be guaranteed to never trigger. Jason Ekstrand (11): spirv: Add better error messages in vtn_value helpers spirv: Make 'info' a local array spirv_info_c.py spirv/info: Add spirv_op_to_string spirv: Unify boolean constants and add better validation spirv: Remove a pointless assignment in SpvOpSpecConstant spirv: Add better validation to Op[Spec]Constant spirv: Refactor Op[Spec]ConstantComposite and add better validation spirv: Switch on vtn_base_type in OpComposite(Extract|Insert) spirv: Add better type validation to OpTypeImage spirv: Rework error checking for decorations spirv: Rework asserts in var_decoration_cb src/compiler/spirv/spirv_info.h| 1 + src/compiler/spirv/spirv_info_c.py | 18 ++- src/compiler/spirv/spirv_to_nir.c | 225 - src/compiler/spirv/vtn_private.h | 26 +++-- src/compiler/spirv/vtn_variables.c | 15 +-- 5 files changed, 163 insertions(+), 122 deletions(-) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] spirv: Import 1.2 rev 3 headers from Khronos
--- src/compiler/spirv/spirv.h | 27 --- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/compiler/spirv/spirv.h b/src/compiler/spirv/spirv.h index 61559a1c9c..233dc6d691 100644 --- a/src/compiler/spirv/spirv.h +++ b/src/compiler/spirv/spirv.h @@ -1,5 +1,5 @@ /* -** Copyright (c) 2014-2017 The Khronos Group Inc. +** Copyright (c) 2014-2018 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a copy ** of this software and/or associated documentation files (the "Materials"), @@ -51,11 +51,11 @@ typedef unsigned int SpvId; #define SPV_VERSION 0x10200 -#define SPV_REVISION 1 +#define SPV_REVISION 3 static const unsigned int SpvMagicNumber = 0x07230203; static const unsigned int SpvVersion = 0x00010200; -static const unsigned int SpvRevision = 1; +static const unsigned int SpvRevision = 3; static const unsigned int SpvOpCodeMask = 0x; static const unsigned int SpvWordCountShift = 16; @@ -134,6 +134,7 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeLocalSizeId = 38, SpvExecutionModeLocalSizeHintId = 39, SpvExecutionModePostDepthCoverage = 4446, +SpvExecutionModeStencilRefReplacingEXT = 5027, SpvExecutionModeMax = 0x7fff, } SpvExecutionMode; @@ -458,11 +459,13 @@ typedef enum SpvBuiltIn_ { SpvBuiltInBaryCoordSmoothCentroidAMD = 4996, SpvBuiltInBaryCoordSmoothSampleAMD = 4997, SpvBuiltInBaryCoordPullModelAMD = 4998, +SpvBuiltInFragStencilRefEXT = 5014, SpvBuiltInViewportMaskNV = 5253, SpvBuiltInSecondaryPositionNV = 5257, SpvBuiltInSecondaryViewportMaskNV = 5258, SpvBuiltInPositionPerViewNV = 5261, SpvBuiltInViewportMaskPerViewNV = 5262, +SpvBuiltInFullyCoveredEXT = 5264, SpvBuiltInMax = 0x7fff, } SpvBuiltIn; @@ -661,12 +664,20 @@ typedef enum SpvCapability_ { SpvCapabilityAtomicStorageOps = 4445, SpvCapabilitySampleMaskPostDepthCoverage = 4447, SpvCapabilityImageGatherBiasLodAMD = 5009, +SpvCapabilityFragmentMaskAMD = 5010, +SpvCapabilityStencilExportEXT = 5013, +SpvCapabilityImageReadWriteLodAMD = 5015, SpvCapabilitySampleMaskOverrideCoverageNV = 5249, SpvCapabilityGeometryShaderPassthroughNV = 5251, +SpvCapabilityShaderViewportIndexLayerEXT = 5254, SpvCapabilityShaderViewportIndexLayerNV = 5254, SpvCapabilityShaderViewportMaskNV = 5255, SpvCapabilityShaderStereoViewNV = 5259, SpvCapabilityPerViewAttributesNV = 5260, +SpvCapabilityFragmentFullyCoveredEXT = 5265, +SpvCapabilitySubgroupShuffleINTEL = 5568, +SpvCapabilitySubgroupBufferBlockIOINTEL = 5569, +SpvCapabilitySubgroupImageBlockIOINTEL = 5570, SpvCapabilityMax = 0x7fff, } SpvCapability; @@ -991,6 +1002,16 @@ typedef enum SpvOp_ { SpvOpGroupFMaxNonUniformAMD = 5005, SpvOpGroupUMaxNonUniformAMD = 5006, SpvOpGroupSMaxNonUniformAMD = 5007, +SpvOpFragmentMaskFetchAMD = 5011, +SpvOpFragmentFetchAMD = 5012, +SpvOpSubgroupShuffleINTEL = 5571, +SpvOpSubgroupShuffleDownINTEL = 5572, +SpvOpSubgroupShuffleUpINTEL = 5573, +SpvOpSubgroupShuffleXorINTEL = 5574, +SpvOpSubgroupBlockReadINTEL = 5575, +SpvOpSubgroupBlockWriteINTEL = 5576, +SpvOpSubgroupImageBlockReadINTEL = 5577, +SpvOpSubgroupImageBlockWriteINTEL = 5578, SpvOpMax = 0x7fff, } SpvOp; -- 2.15.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/11] spirv: Add better type validation to OpTypeImage
On 17/12/17 05:46, Jason Ekstrand wrote: --- src/compiler/spirv/spirv_to_nir.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 83c75c7..ffea442 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1075,10 +1075,12 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, case SpvOpTypeImage: { val->type->base_type = vtn_base_type_image; - const struct glsl_type *sampled_type = - vtn_value(b, w[2], vtn_value_type_type)->type->type; + const struct vtn_type *sampled_type = + vtn_value(b, w[2], vtn_value_type_type)->type; - vtn_assert(glsl_type_is_vector_or_scalar(sampled_type)); + vtn_fail_if(sampled_type->base_type != vtn_base_type_scalar || + glsl_get_bit_size(sampled_type->type) != 32, + "Sampled type of OpTypeImage must be a 32-bit scalar"); Maybe I'm missing something, but the 1.3 spec says : "Sampled Type is the type of the components that result from sampling or reading from this image type. Must be a scalar numerical type or OpTypeVoid." "Numerical type: An integer type or a floating-point type." "Floating-point type: Any width type from OpTypeFloat." And OpTypeFloat can have variable width (like 16bits), so I'm not too sure about != 32 for the fail_if() above. enum glsl_sampler_dim dim; switch ((SpvDim)w[3]) { @@ -1090,7 +1092,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; case SpvDimSubpassData: dim = GLSL_SAMPLER_DIM_SUBPASS; break; default: - vtn_fail("Invalid SPIR-V Sampler dimension"); + vtn_fail("Invalid SPIR-V image dimensionality"); } bool is_shadow = w[4]; @@ -1115,15 +1117,16 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, val->type->image_format = translate_image_format(b, format); + enum glsl_base_type sampled_base_type = + glsl_get_base_type(sampled_type->type); if (sampled == 1) { val->type->sampled = true; val->type->type = glsl_sampler_type(dim, is_shadow, is_array, - glsl_get_base_type(sampled_type)); + sampled_base_type); } else if (sampled == 2) { vtn_assert(!is_shadow); val->type->sampled = false; - val->type->type = glsl_image_type(dim, is_array, - glsl_get_base_type(sampled_type)); + val->type->type = glsl_image_type(dim, is_array, sampled_base_type); } else { vtn_fail("We need to know if the image will be sampled"); } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/11] spirv: Add better validation to Op[Spec]Constant
On 17/12/17 05:46, Jason Ekstrand wrote: --- src/compiler/spirv/spirv_to_nir.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 208ec95..d65c0d0 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1300,6 +1300,9 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, } case SpvOpConstant: { + vtn_fail_if(val->type->base_type != vtn_base_type_scalar, + "Result type of %s must be a scalar", + spirv_op_to_string(opcode)); vtn_assert(glsl_type_is_scalar(val->type->type)); I guess you can drop the vtn_assert() above. int bit_size = glsl_get_bit_size(val->type->type); switch (bit_size) { @@ -1317,8 +1320,11 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, } break; } + case SpvOpSpecConstant: { - vtn_assert(glsl_type_is_scalar(val->type->type)); + vtn_fail_if(val->type->base_type != vtn_base_type_scalar, + "Result type of %s must be a scalar", + spirv_op_to_string(opcode)); int bit_size = glsl_get_bit_size(val->type->type); switch (bit_size) { case 64: ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] dri_util: remove ALLOW_RGB10_CONFIGS option
From: Marek OlšákThis is unused because it's for libGL/libEGL, not drivers. --- src/mesa/drivers/dri/common/dri_util.c | 4 1 file changed, 4 deletions(-) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index d4fba0b..e6a7d23 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -48,24 +48,20 @@ #include "main/version.h" #include "main/debug_output.h" #include "main/errors.h" #include "main/macros.h" const char __dri2ConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1) DRI_CONF_SECTION_END - - DRI_CONF_SECTION_MISCELLANEOUS - DRI_CONF_ALLOW_RGB10_CONFIGS("true") - DRI_CONF_SECTION_END DRI_CONF_END; /*/ /** \name Screen handling functions */ /*/ /*@{*/ static void setupLoaderExtensions(__DRIscreen *psp, const __DRIextension **extensions) -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/22] dri/common: Add option to allow exposure of 10 bpc color configs. (v2)
The driver options are the defaults. drirc overrides the defaults. ~/.drirc overrides both the defaults and drirc. Environment variables override everything. Marek On Mon, Jan 8, 2018 at 9:37 AM, Tapani Pälliwrote: > > > On 01/08/2018 01:55 AM, Steven Newbury wrote: >> >> On Fri, 2017-12-15 at 23:04 +0100, Mario Kleiner wrote: >>> >>> Some clients may not like RGB10X2 and RGB10A2 fbconfigs and >>> visuals. Add a new driconf option 'allow_rgb10_configs' to >>> allow per application enable/disable. >>> >>> The option defaults to enabled. >>> >>> v2: Rename expose_rgb10_configs to allow_rgb10_configs, >>> as suggested by Emil. Add comment to option parsing, >>> to make sure it stays before the ->InitScreen(). >>> >>> Signed-off-by: Mario Kleiner >>> Reviewed-by: Tapani Pälli >>> Reviewed-by: Marek Olšák >>> --- >>> src/mesa/drivers/dri/common/dri_util.c | 12 >>> src/util/xmlpool/t_options.h | 5 + >>> 2 files changed, 13 insertions(+), 4 deletions(-) >>> >>> diff --git a/src/mesa/drivers/dri/common/dri_util.c >>> b/src/mesa/drivers/dri/common/dri_util.c >>> index d504751..d4fba0b 100644 >>> --- a/src/mesa/drivers/dri/common/dri_util.c >>> +++ b/src/mesa/drivers/dri/common/dri_util.c >>> @@ -55,6 +55,10 @@ const char __dri2ConfigOptions[] = >>> DRI_CONF_SECTION_PERFORMANCE >>>DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1) >>> DRI_CONF_SECTION_END >>> + >>> + DRI_CONF_SECTION_MISCELLANEOUS >>> + DRI_CONF_ALLOW_RGB10_CONFIGS("true") >>> + DRI_CONF_SECTION_END >>> DRI_CONF_END; >>> >> >> This isn't exposing the driconf option for me with IVB HD4000. Adding >> the option to the same section in >> src/mesa/drivers/dri/i965/intel_screen.c did work though. >> >> Mind you having this default as true (which it does whether or not the >> option is available) is really bad here. On my LVDS display (which >> presumably isn't supporting 10bpc even if the chipset does, means by >> default my display colours are completely corrupted. >> >> What's more, the driconf option doesn't address Wayland compositors, >> for example GDM with Wayland doesn't respect the option but uses, I >> guess, a default config which happens to be 10bpc. >> > > I thought I knew how drirc works ... but seems like this is not the case. I > thought driver could override this option by introducing same section in > it's own drirc but that does not seem to work :/ I think we should set this > globally false for now (until compositor issues are resolved) and also fix > the option to work in a way that individual driver can set this value. > > FYI I filed but about this here: > https://bugs.freedesktop.org/show_bug.cgi?id=104536 > > // Tapani > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 1/6] spirv: Add a mechanism for dumping failing shaders
On Mon, Jan 8, 2018 at 8:40 AM, Lionel Landwerlin < lionel.g.landwer...@intel.com> wrote: > Hmm... The email subject says it's the v3. > Am I missing something? (it's at the top of the list in my client, so it's > probably the last you sent). > Nope, it's my e-mail client that's failing me... Sorry for the noise. --Jason > > On 08/01/18 16:38, Jason Ekstrand wrote: > > There's a v3 of this > > On Mon, Jan 8, 2018 at 8:36 AM, Lionel Landwerlin < > lionel.g.landwer...@intel.com> wrote: > >> This series is : >> >> Reviewed-by: Lionel Landwerlin>> >> >> On 04/01/18 01:34, Jason Ekstrand wrote: >> >>> --- >>> src/compiler/spirv/spirv_to_nir.c | 28 >>> src/compiler/spirv/vtn_private.h | 1 + >>> 2 files changed, 29 insertions(+) >>> >>> diff --git a/src/compiler/spirv/spirv_to_nir.c >>> b/src/compiler/spirv/spirv_to_nir.c >>> index dcff56f..66ea267 100644 >>> --- a/src/compiler/spirv/spirv_to_nir.c >>> +++ b/src/compiler/spirv/spirv_to_nir.c >>> @@ -31,6 +31,8 @@ >>> #include "nir/nir_constant_expressions.h" >>> #include "spirv_info.h" >>> +#include >>> + >>> void >>> vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level, >>> size_t spirv_offset, const char *message) >>> @@ -94,6 +96,27 @@ vtn_log_err(struct vtn_builder *b, >>> ralloc_free(msg); >>> } >>> +static void >>> +vtn_dump_shader(struct vtn_builder *b, const char *path, const char >>> *prefix) >>> +{ >>> + static int idx = 0; >>> + >>> + char filename[1024]; >>> + int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv", >>> + path, prefix, idx++); >>> + if (len < 0 || len >= sizeof(filename)) >>> + return; >>> + >>> + FILE *f = fopen(filename, "w"); >>> + if (f == NULL) >>> + return; >>> + >>> + fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f); >>> + fclose(f); >>> + >>> + vtn_info("SPIR-V shader dumped to %s", filename); >>> +} >>> + >>> void >>> _vtn_warn(struct vtn_builder *b, const char *file, unsigned line, >>> const char *fmt, ...) >>> @@ -117,6 +140,10 @@ _vtn_fail(struct vtn_builder *b, const char *file, >>> unsigned line, >>> file, line, fmt, args); >>> va_end(args); >>> + const char *dump_path = getenv("MESA_SPIRV_FAIL_DUMP_PATH"); >>> + if (dump_path) >>> + vtn_dump_shader(b, dump_path, "fail"); >>> + >>> longjmp(b->fail_jump, 1); >>> } >>> @@ -3690,6 +3717,7 @@ spirv_to_nir(const uint32_t *words, size_t >>> word_count, >>> /* Initialize the stn_builder object */ >>> struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); >>> b->spirv = words; >>> + b->spirv_word_count = word_count; >>> b->file = NULL; >>> b->line = -1; >>> b->col = -1; >>> diff --git a/src/compiler/spirv/vtn_private.h >>> b/src/compiler/spirv/vtn_private.h >>> index f7d8f49..374643a 100644 >>> --- a/src/compiler/spirv/vtn_private.h >>> +++ b/src/compiler/spirv/vtn_private.h >>> @@ -531,6 +531,7 @@ struct vtn_builder { >>> jmp_buf fail_jump; >>>const uint32_t *spirv; >>> + size_t spirv_word_count; >>>nir_shader *shader; >>> const struct spirv_to_nir_options *options; >>> >> >> >> > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 1/6] spirv: Add a mechanism for dumping failing shaders
https://patchwork.freedesktop.org/series/35979/ On Mon, Jan 8, 2018 at 8:40 AM, Lionel Landwerlin < lionel.g.landwer...@intel.com> wrote: > Hmm... The email subject says it's the v3. > Am I missing something? (it's at the top of the list in my client, so it's > probably the last you sent). > > > On 08/01/18 16:38, Jason Ekstrand wrote: > > There's a v3 of this > > On Mon, Jan 8, 2018 at 8:36 AM, Lionel Landwerlin < > lionel.g.landwer...@intel.com> wrote: > >> This series is : >> >> Reviewed-by: Lionel Landwerlin>> >> >> On 04/01/18 01:34, Jason Ekstrand wrote: >> >>> --- >>> src/compiler/spirv/spirv_to_nir.c | 28 >>> src/compiler/spirv/vtn_private.h | 1 + >>> 2 files changed, 29 insertions(+) >>> >>> diff --git a/src/compiler/spirv/spirv_to_nir.c >>> b/src/compiler/spirv/spirv_to_nir.c >>> index dcff56f..66ea267 100644 >>> --- a/src/compiler/spirv/spirv_to_nir.c >>> +++ b/src/compiler/spirv/spirv_to_nir.c >>> @@ -31,6 +31,8 @@ >>> #include "nir/nir_constant_expressions.h" >>> #include "spirv_info.h" >>> +#include >>> + >>> void >>> vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level, >>> size_t spirv_offset, const char *message) >>> @@ -94,6 +96,27 @@ vtn_log_err(struct vtn_builder *b, >>> ralloc_free(msg); >>> } >>> +static void >>> +vtn_dump_shader(struct vtn_builder *b, const char *path, const char >>> *prefix) >>> +{ >>> + static int idx = 0; >>> + >>> + char filename[1024]; >>> + int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv", >>> + path, prefix, idx++); >>> + if (len < 0 || len >= sizeof(filename)) >>> + return; >>> + >>> + FILE *f = fopen(filename, "w"); >>> + if (f == NULL) >>> + return; >>> + >>> + fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f); >>> + fclose(f); >>> + >>> + vtn_info("SPIR-V shader dumped to %s", filename); >>> +} >>> + >>> void >>> _vtn_warn(struct vtn_builder *b, const char *file, unsigned line, >>> const char *fmt, ...) >>> @@ -117,6 +140,10 @@ _vtn_fail(struct vtn_builder *b, const char *file, >>> unsigned line, >>> file, line, fmt, args); >>> va_end(args); >>> + const char *dump_path = getenv("MESA_SPIRV_FAIL_DUMP_PATH"); >>> + if (dump_path) >>> + vtn_dump_shader(b, dump_path, "fail"); >>> + >>> longjmp(b->fail_jump, 1); >>> } >>> @@ -3690,6 +3717,7 @@ spirv_to_nir(const uint32_t *words, size_t >>> word_count, >>> /* Initialize the stn_builder object */ >>> struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); >>> b->spirv = words; >>> + b->spirv_word_count = word_count; >>> b->file = NULL; >>> b->line = -1; >>> b->col = -1; >>> diff --git a/src/compiler/spirv/vtn_private.h >>> b/src/compiler/spirv/vtn_private.h >>> index f7d8f49..374643a 100644 >>> --- a/src/compiler/spirv/vtn_private.h >>> +++ b/src/compiler/spirv/vtn_private.h >>> @@ -531,6 +531,7 @@ struct vtn_builder { >>> jmp_buf fail_jump; >>>const uint32_t *spirv; >>> + size_t spirv_word_count; >>>nir_shader *shader; >>> const struct spirv_to_nir_options *options; >>> >> >> >> > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] link mesautil with pthreads
On Mon, 2018-01-01 at 22:49 +0100, Igor Gnatenko wrote: > ../../src/util/.libs/libmesautil.a(libmesautil_la-u_queue.o): In function > `u_thread_setname': > /builddir/build/BUILD/mesa-17.3.1/src/util/../../src/util/u_thread.h:66: > undefined reference to `pthread_setname_np' > ../../src/util/.libs/libmesautil.a(libmesautil_la-u_queue.o): In function > `thrd_join': > /builddir/build/BUILD/mesa-17.3.1/src/util/../../include/c11/threads_posix.h:336: > undefined reference to `pthread_join' > ../../src/util/.libs/libmesautil.a(libmesautil_la-u_queue.o): In function > `u_thread_create': > /builddir/build/BUILD/mesa-17.3.1/src/util/../../src/util/u_thread.h:48: > undefined reference to `pthread_sigmask' > ../../src/util/.libs/libmesautil.a(libmesautil_la-u_queue.o): In function > `thrd_create': > /builddir/build/BUILD/mesa-17.3.1/src/util/../../include/c11/threads_posix.h:296: > undefined reference to `pthread_create' > ../../src/util/.libs/libmesautil.a(libmesautil_la-u_queue.o): In function > `u_thread_create': > /builddir/build/BUILD/mesa-17.3.1/src/util/../../src/util/u_thread.h:50: > undefined reference to `pthread_sigmask' > /builddir/build/BUILD/mesa-17.3.1/src/util/../../src/util/u_thread.h:50: > undefined reference to `pthread_sigmask' > ../../src/util/.libs/libmesautil.a(libmesautil_la-u_queue.o): In function > `call_once': > /builddir/build/BUILD/mesa-17.3.1/src/util/../../include/c11/threads_posix.h:96: > undefined reference to `pthread_once' > ../../src/util/.libs/libmesautil.a(libmesautil_la-u_queue.o): In function > `u_thread_get_time_nano': > /builddir/build/BUILD/mesa-17.3.1/src/util/../../src/util/u_thread.h:84: > undefined reference to `pthread_getcpuclockid' > collect2: error: ld returned 1 exit status > > Signed-off-by: Igor GnatenkoYep, just hit this in rawhide myself. Merged, thanks. - ajax ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 1/6] spirv: Add a mechanism for dumping failing shaders
Hmm... The email subject says it's the v3. Am I missing something? (it's at the top of the list in my client, so it's probably the last you sent). On 08/01/18 16:38, Jason Ekstrand wrote: There's a v3 of this On Mon, Jan 8, 2018 at 8:36 AM, Lionel Landwerlin> wrote: This series is : Reviewed-by: Lionel Landwerlin > On 04/01/18 01:34, Jason Ekstrand wrote: --- src/compiler/spirv/spirv_to_nir.c | 28 src/compiler/spirv/vtn_private.h | 1 + 2 files changed, 29 insertions(+) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index dcff56f..66ea267 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -31,6 +31,8 @@ #include "nir/nir_constant_expressions.h" #include "spirv_info.h" +#include + void vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level, size_t spirv_offset, const char *message) @@ -94,6 +96,27 @@ vtn_log_err(struct vtn_builder *b, ralloc_free(msg); } +static void +vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix) +{ + static int idx = 0; + + char filename[1024]; + int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv", + path, prefix, idx++); + if (len < 0 || len >= sizeof(filename)) + return; + + FILE *f = fopen(filename, "w"); + if (f == NULL) + return; + + fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f); + fclose(f); + + vtn_info("SPIR-V shader dumped to %s", filename); +} + void _vtn_warn(struct vtn_builder *b, const char *file, unsigned line, const char *fmt, ...) @@ -117,6 +140,10 @@ _vtn_fail(struct vtn_builder *b, const char *file, unsigned line, file, line, fmt, args); va_end(args); + const char *dump_path = getenv("MESA_SPIRV_FAIL_DUMP_PATH"); + if (dump_path) + vtn_dump_shader(b, dump_path, "fail"); + longjmp(b->fail_jump, 1); } @@ -3690,6 +3717,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, /* Initialize the stn_builder object */ struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); b->spirv = words; + b->spirv_word_count = word_count; b->file = NULL; b->line = -1; b->col = -1; diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index f7d8f49..374643a 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -531,6 +531,7 @@ struct vtn_builder { jmp_buf fail_jump; const uint32_t *spirv; + size_t spirv_word_count; nir_shader *shader; const struct spirv_to_nir_options *options; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 1/6] spirv: Add a mechanism for dumping failing shaders
There's a v3 of this On Mon, Jan 8, 2018 at 8:36 AM, Lionel Landwerlin < lionel.g.landwer...@intel.com> wrote: > This series is : > > Reviewed-by: Lionel Landwerlin> > > On 04/01/18 01:34, Jason Ekstrand wrote: > >> --- >> src/compiler/spirv/spirv_to_nir.c | 28 >> src/compiler/spirv/vtn_private.h | 1 + >> 2 files changed, 29 insertions(+) >> >> diff --git a/src/compiler/spirv/spirv_to_nir.c >> b/src/compiler/spirv/spirv_to_nir.c >> index dcff56f..66ea267 100644 >> --- a/src/compiler/spirv/spirv_to_nir.c >> +++ b/src/compiler/spirv/spirv_to_nir.c >> @@ -31,6 +31,8 @@ >> #include "nir/nir_constant_expressions.h" >> #include "spirv_info.h" >> +#include >> + >> void >> vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level, >> size_t spirv_offset, const char *message) >> @@ -94,6 +96,27 @@ vtn_log_err(struct vtn_builder *b, >> ralloc_free(msg); >> } >> +static void >> +vtn_dump_shader(struct vtn_builder *b, const char *path, const char >> *prefix) >> +{ >> + static int idx = 0; >> + >> + char filename[1024]; >> + int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv", >> + path, prefix, idx++); >> + if (len < 0 || len >= sizeof(filename)) >> + return; >> + >> + FILE *f = fopen(filename, "w"); >> + if (f == NULL) >> + return; >> + >> + fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f); >> + fclose(f); >> + >> + vtn_info("SPIR-V shader dumped to %s", filename); >> +} >> + >> void >> _vtn_warn(struct vtn_builder *b, const char *file, unsigned line, >> const char *fmt, ...) >> @@ -117,6 +140,10 @@ _vtn_fail(struct vtn_builder *b, const char *file, >> unsigned line, >> file, line, fmt, args); >> va_end(args); >> + const char *dump_path = getenv("MESA_SPIRV_FAIL_DUMP_PATH"); >> + if (dump_path) >> + vtn_dump_shader(b, dump_path, "fail"); >> + >> longjmp(b->fail_jump, 1); >> } >> @@ -3690,6 +3717,7 @@ spirv_to_nir(const uint32_t *words, size_t >> word_count, >> /* Initialize the stn_builder object */ >> struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); >> b->spirv = words; >> + b->spirv_word_count = word_count; >> b->file = NULL; >> b->line = -1; >> b->col = -1; >> diff --git a/src/compiler/spirv/vtn_private.h >> b/src/compiler/spirv/vtn_private.h >> index f7d8f49..374643a 100644 >> --- a/src/compiler/spirv/vtn_private.h >> +++ b/src/compiler/spirv/vtn_private.h >> @@ -531,6 +531,7 @@ struct vtn_builder { >> jmp_buf fail_jump; >>const uint32_t *spirv; >> + size_t spirv_word_count; >>nir_shader *shader; >> const struct spirv_to_nir_options *options; >> > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 1/6] spirv: Add a mechanism for dumping failing shaders
This series is : Reviewed-by: Lionel LandwerlinOn 04/01/18 01:34, Jason Ekstrand wrote: --- src/compiler/spirv/spirv_to_nir.c | 28 src/compiler/spirv/vtn_private.h | 1 + 2 files changed, 29 insertions(+) diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index dcff56f..66ea267 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -31,6 +31,8 @@ #include "nir/nir_constant_expressions.h" #include "spirv_info.h" +#include + void vtn_log(struct vtn_builder *b, enum nir_spirv_debug_level level, size_t spirv_offset, const char *message) @@ -94,6 +96,27 @@ vtn_log_err(struct vtn_builder *b, ralloc_free(msg); } +static void +vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix) +{ + static int idx = 0; + + char filename[1024]; + int len = snprintf(filename, sizeof(filename), "%s/%s-%d.spirv", + path, prefix, idx++); + if (len < 0 || len >= sizeof(filename)) + return; + + FILE *f = fopen(filename, "w"); + if (f == NULL) + return; + + fwrite(b->spirv, sizeof(*b->spirv), b->spirv_word_count, f); + fclose(f); + + vtn_info("SPIR-V shader dumped to %s", filename); +} + void _vtn_warn(struct vtn_builder *b, const char *file, unsigned line, const char *fmt, ...) @@ -117,6 +140,10 @@ _vtn_fail(struct vtn_builder *b, const char *file, unsigned line, file, line, fmt, args); va_end(args); + const char *dump_path = getenv("MESA_SPIRV_FAIL_DUMP_PATH"); + if (dump_path) + vtn_dump_shader(b, dump_path, "fail"); + longjmp(b->fail_jump, 1); } @@ -3690,6 +3717,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count, /* Initialize the stn_builder object */ struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); b->spirv = words; + b->spirv_word_count = word_count; b->file = NULL; b->line = -1; b->col = -1; diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index f7d8f49..374643a 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -531,6 +531,7 @@ struct vtn_builder { jmp_buf fail_jump; const uint32_t *spirv; + size_t spirv_word_count; nir_shader *shader; const struct spirv_to_nir_options *options; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] glsl: remove LowerTCSPatchVerticesIn
This looks good to me. However, I think we'll need to do the same thing for LowerTESPatchVerticesIn and they can probably be handled by the same pass. On Mon, Jan 8, 2018 at 5:13 AM, Iago Toral Quirogawrote: > Intel was the only user, now it does this in NIR. > --- > src/compiler/glsl/builtin_variables.cpp | 13 + > src/mesa/drivers/dri/i965/brw_context.c | 1 - > src/mesa/main/mtypes.h | 1 - > 3 files changed, 1 insertion(+), 14 deletions(-) > > diff --git a/src/compiler/glsl/builtin_variables.cpp > b/src/compiler/glsl/builtin_variables.cpp > index a885f32875..a686cb6a45 100644 > --- a/src/compiler/glsl/builtin_variables.cpp > +++ b/src/compiler/glsl/builtin_variables.cpp > @@ -38,11 +38,6 @@ static const struct gl_builtin_uniform_element > gl_NumSamples_elements[] = { > {NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_} > }; > > -/* only for TCS */ > -static const struct gl_builtin_uniform_element > gl_PatchVerticesIn_elements[] = { > - {NULL, {STATE_INTERNAL, STATE_TCS_PATCH_VERTICES_IN}, SWIZZLE_} > -}; > - > static const struct gl_builtin_uniform_element gl_DepthRange_elements[] = > { > {"near", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_}, > {"far", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_}, > @@ -240,7 +235,6 @@ static const struct gl_builtin_uniform_element > gl_NormalMatrix_elements[] = { > #define STATEVAR(name) {#name, name ## _elements, ARRAY_SIZE(name ## > _elements)} > > static const struct gl_builtin_uniform_desc _mesa_builtin_uniform_desc[] > = { > - STATEVAR(gl_PatchVerticesIn), > STATEVAR(gl_NumSamples), > STATEVAR(gl_DepthRange), > STATEVAR(gl_ClipPlane), > @@ -1067,12 +1061,7 @@ builtin_variable_generator:: > generate_tcs_special_vars() > { > add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); > add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, > "gl_InvocationID"); > - > - if (state->ctx->Const.LowerTCSPatchVerticesIn) { > - add_uniform(int_t, "gl_PatchVerticesIn"); > - } else { > - add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, > "gl_PatchVerticesIn"); > - } > + add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, > "gl_PatchVerticesIn"); > > add_output(VARYING_SLOT_TESS_LEVEL_OUTER, array(float_t, 4), >"gl_TessLevelOuter")->data.patch = 1; > diff --git a/src/mesa/drivers/dri/i965/brw_context.c > b/src/mesa/drivers/dri/i965/brw_context.c > index b555f7bfdf..e44c2b1f34 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.c > +++ b/src/mesa/drivers/dri/i965/brw_context.c > @@ -537,7 +537,6 @@ brw_initialize_context_constants(struct brw_context > *brw) >ctx->Const.MaxClipPlanes = 8; > > ctx->Const.GLSLTessLevelsAsInputs = true; > - ctx->Const.LowerTCSPatchVerticesIn = devinfo->gen >= 8; > ctx->Const.LowerTESPatchVerticesIn = true; > ctx->Const.PrimitiveRestartForPatches = true; > > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h > index a4f1e78f41..fce456b2b4 100644 > --- a/src/mesa/main/mtypes.h > +++ b/src/mesa/main/mtypes.h > @@ -4020,7 +4020,6 @@ struct gl_constants > GLuint MaxTessPatchComponents; > GLuint MaxTessControlTotalOutputComponents; > bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to vecn? */ > - bool LowerTCSPatchVerticesIn; /**< Lower gl_PatchVerticesIn to a > uniform */ > bool LowerTESPatchVerticesIn; /**< Lower gl_PatchVerticesIn to a > uniform */ > bool PrimitiveRestartForPatches; > bool LowerCsDerivedVariables;/**< Lower gl_GlobalInvocationID and > -- > 2.11.0 > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] i965/nir: add a helper to lower TCS gl_PatchVerticesIn to a uniform
On Mon, Jan 8, 2018 at 5:13 AM, Iago Toral Quirogawrote: > --- > src/intel/compiler/brw_nir.h | 2 ++ > src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 29 > ++ > 2 files changed, 31 insertions(+) > > diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h > index 809d4c338d..a29d858806 100644 > --- a/src/intel/compiler/brw_nir.h > +++ b/src/intel/compiler/brw_nir.h > @@ -145,6 +145,8 @@ void brw_nir_setup_arb_uniforms(void *mem_ctx, > nir_shader *shader, > struct gl_program *prog, > struct brw_stage_prog_data > *stage_prog_data); > > +void brw_nir_lower_tcs_vertices_in_to_uniform(nir_shader *nir); > + > void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, > nir_shader *nir, > struct brw_ubo_range out_ranges[4]); > diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > index 9e135cbb1a..38498833a7 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp > @@ -243,3 +243,32 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader > *shader, > stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO; > } > } > + > +void > +brw_nir_lower_tcs_vertices_in_to_uniform(nir_shader *nir) > +{ > + assert(nir->info.stage == MESA_SHADER_TESS_CTRL); > + > + int uniform = -1; > In this new version, uniform is always -1 so we should drop it. > + nir_foreach_variable_safe(var, >system_values) { > + if (var->data.location != SYSTEM_VALUE_VERTICES_IN) > + continue; > + > + if (uniform == -1) { > + gl_state_index tokens[STATE_LENGTH] = { > +STATE_INTERNAL, > +STATE_TCS_PATCH_VERTICES_IN, > + }; > + var->num_state_slots = 1; > + var->state_slots = > +ralloc_array(var, nir_state_slot, var->num_state_slots); > + memcpy(var->state_slots[0].tokens, tokens, sizeof(tokens)); > + var->state_slots[0].swizzle = SWIZZLE_; > + } > + > + var->data.mode = nir_var_uniform; > + var->data.location = uniform; > + exec_node_remove(>node); > + exec_list_push_tail(>uniforms, >node); > + } > +} > -- > 2.11.0 > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] ac: add load_tess_level() to the abi
For the series: Reviewed-by: Marek OlšákMarek On Mon, Jan 8, 2018 at 6:07 AM, Timothy Arceri wrote: > Fixes the following piglit tests in radeonsi: > > vs-tcs-tes-tessinner-tessouter-inputs-quads.shader_test > vs-tcs-tes-tessinner-tessouter-inputs-tris.shader_test > vs-tes-tessinner-tessouter-inputs-quads.shader_test > vs-tes-tessinner-tessouter-inputs-tris.shader_test > > v2: make use of si_shader_io_get_unique_index_patch() > via the helper in the previous patch rather than > shader_io_get_unique_index() > > Reviewed-by: Nicolai Hähnle (v1) > --- > src/amd/common/ac_nir_to_llvm.c | 6 ++ > src/amd/common/ac_shader_abi.h | 4 > src/gallium/drivers/radeonsi/si_shader.c | 22 ++ > 3 files changed, 32 insertions(+) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 48e2920a15..5203b78537 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -4364,6 +4364,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx, > result = ctx->abi->load_tess_coord(ctx->abi, type, > instr->num_components); > break; > } > + case nir_intrinsic_load_tess_level_outer: > + result = ctx->abi->load_tess_level(ctx->abi, > VARYING_SLOT_TESS_LEVEL_OUTER); > + break; > + case nir_intrinsic_load_tess_level_inner: > + result = ctx->abi->load_tess_level(ctx->abi, > VARYING_SLOT_TESS_LEVEL_INNER); > + break; > case nir_intrinsic_load_patch_vertices_in: > result = LLVMConstInt(ctx->ac.i32, > ctx->nctx->options->key.tcs.input_vertices, false); > break; > diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h > index 277e4efe47..e3a47089a5 100644 > --- a/src/amd/common/ac_shader_abi.h > +++ b/src/amd/common/ac_shader_abi.h > @@ -103,6 +103,10 @@ struct ac_shader_abi { > LLVMTypeRef type, > unsigned num_components); > > + LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi, > + unsigned varying_id); > + > + > LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef > index); > > /** > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index e579916359..86f3f7a8ba 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -1934,6 +1934,27 @@ static LLVMValueRef load_tess_level(struct > si_shader_context *ctx, > > } > > +static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, > + unsigned varying_id) > +{ > + struct si_shader_context *ctx = si_shader_context_from_abi(abi); > + unsigned semantic_name; > + > + switch (varying_id) { > + case VARYING_SLOT_TESS_LEVEL_INNER: > + semantic_name = TGSI_SEMANTIC_TESSINNER; > + break; > + case VARYING_SLOT_TESS_LEVEL_OUTER: > + semantic_name = TGSI_SEMANTIC_TESSOUTER; > + break; > + default: > + unreachable("unknown tess level"); > + } > + > + return load_tess_level(ctx, semantic_name); > + > +} > + > void si_load_system_value(struct si_shader_context *ctx, > unsigned index, > const struct tgsi_full_declaration *decl) > @@ -5971,6 +5992,7 @@ static bool si_compile_tgsi_main(struct > si_shader_context *ctx, > bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes; > ctx->abi.load_tess_inputs = si_nir_load_input_tes; > ctx->abi.load_tess_coord = si_load_tess_coord; > + ctx->abi.load_tess_level = si_load_tess_level; > if (shader->key.as_es) > ctx->abi.emit_outputs = si_llvm_emit_es_epilogue; > else > -- > 2.14.3 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa/st: translate SO info in glsl_to_nir() case
From: Rob ClarkThis was handled for VS, but not for GS. Fixes for gallium drivers using nir: spec@arb_gpu_shader5@arb_gpu_shader5-xfb-streams-without-invocations spec@arb_gpu_shader5@arb_gpu_shader5-xfb-streams* spec@arb_transform_feedback3@arb_transform_feedback3-ext_interleaved_two_bufs_gs* spec@ext_transform_feedback@geometry-shaders-basic spec@ext_transform_feedback@* use_gs spec@glsl-1.50@execution@geometry@primitive-id* spec@glsl-1.50@execution@geometry@tri-strip-ordering-with-prim-restart gl_triangle_strip * spec@glsl-1.50@transform-feedback-builtins spec@glsl-1.50@transform-feedback-type-and-size Signed-off-by: Rob Clark Tested-by: Karol Herbst Cc: Rob Clark Cc: Timothy Arceri --- src/mesa/state_tracker/st_program.c | 61 ++--- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 05e6042f42..5efd83d5a8 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -1421,6 +1421,50 @@ st_translate_program_common(struct st_context *st, } } +/** + * Update stream-output info for GS/TCS/TES. Normally this is done in + * st_translate_program_common() but that is not called for glsl_to_nir + * case. + */ +static void +st_translate_program_stream_output(struct gl_program *prog, + struct pipe_stream_output_info *stream_output) +{ + if (!prog->sh.LinkedTransformFeedback) + return; + + ubyte outputMapping[VARYING_SLOT_TESS_MAX]; + GLuint attr; + uint num_outputs = 0; + + memset(outputMapping, 0, sizeof(outputMapping)); + + /* +* Determine number of outputs, the (default) output register +* mapping and the semantic information for each output. +*/ + for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { + if (prog->info.outputs_written & BITFIELD64_BIT(attr)) { + GLuint slot = num_outputs++; + + outputMapping[attr] = slot; + } + } + + /* Also add patch outputs. */ + for (attr = 0; attr < 32; attr++) { + if (prog->info.patch_outputs_written & (1u << attr)) { + GLuint slot = num_outputs++; + GLuint patch_attr = VARYING_SLOT_PATCH0 + attr; + + outputMapping[patch_attr] = slot; + } + } + + st_translate_stream_output_info2(prog->sh.LinkedTransformFeedback, +outputMapping, +stream_output); +} /** * Translate a geometry program to create a new variant. @@ -1432,8 +1476,10 @@ st_translate_geometry_program(struct st_context *st, struct ureg_program *ureg; /* We have already compiled to NIR so just return */ - if (stgp->shader_program) + if (stgp->shader_program) { + st_translate_program_stream_output(>Base, >tgsi.stream_output); return true; + } ureg = ureg_create_with_screen(PIPE_SHADER_GEOMETRY, st->pipe->screen); if (ureg == NULL) @@ -1489,6 +1535,7 @@ st_get_basic_variant(struct st_context *st, tgsi.ir.nir = nir_shader_clone(NULL, prog->tgsi.ir.nir); st_finalize_nir(st, >Base, prog->shader_program, tgsi.ir.nir); +tgsi.stream_output = prog->tgsi.stream_output; } else tgsi = prog->tgsi; /* fill in new variant */ @@ -1529,9 +1576,11 @@ st_translate_tessctrl_program(struct st_context *st, { struct ureg_program *ureg; - /* We have already compiler to NIR so just return */ - if (sttcp->shader_program) + /* We have already compiled to NIR so just return */ + if (sttcp->shader_program) { + st_translate_program_stream_output(>Base, >tgsi.stream_output); return true; + } ureg = ureg_create_with_screen(PIPE_SHADER_TESS_CTRL, st->pipe->screen); if (ureg == NULL) @@ -1558,9 +1607,11 @@ st_translate_tesseval_program(struct st_context *st, { struct ureg_program *ureg; - /* We have already compiler to NIR so just return */ - if (sttep->shader_program) + /* We have already compiled to NIR so just return */ + if (sttep->shader_program) { + st_translate_program_stream_output(>Base, >tgsi.stream_output); return true; + } ureg = ureg_create_with_screen(PIPE_SHADER_TESS_EVAL, st->pipe->screen); if (ureg == NULL) -- 2.14.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] amd/common: bump the number of available user SGPRS to 32 on GFX9
Signed-off-by: Samuel Pitoiset--- src/amd/common/ac_nir_to_llvm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 48e2920a15..9864f9366d 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -600,7 +600,9 @@ static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, if (ctx->shader_info->info.needs_push_constants) user_sgpr_info->sgpr_count += 2; - uint32_t remaining_sgprs = 16 - user_sgpr_info->sgpr_count; + uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; + uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; + if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { user_sgpr_info->sgpr_count += 2; user_sgpr_info->indirect_all_descriptor_sets = true; -- 2.15.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl/linker: link-error using the same name in unnamed block and outside
Please, could someone take a look at this patch? Thanks in advance. J.A. On Mon, 2017-12-04 at 17:35 +0100, Juan A. Suarez Romero wrote: > According with OpenGL GLSL 4.20 spec, section 4.3.9, page 57: > >"It is a link-time error if any particular shader interface > contains: > - two different blocks, each having no instance name, and each > having a member of the same name, or > - a variable outside a block, and a block with no instance name, > where the variable has the same name as a member in the block." > > This means that it is a link error if for example we have a vertex > shader with the following definition. > > "layout(location=0) uniform Data { float a; float b; };" > > and a fragment shader with: > > "uniform float a;" > > As in both cases we refer to both uniforms as "a", and thus using > glGetUniformLocation() wouldn't know which one we mean. > --- > src/compiler/glsl/linker.cpp | 23 +++ > 1 file changed, 23 insertions(+) > > diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp > index 33fd76deae9..b6de7b54ae3 100644 > --- a/src/compiler/glsl/linker.cpp > +++ b/src/compiler/glsl/linker.cpp > @@ -,6 +,29 @@ cross_validate_globals(struct gl_shader_program *prog, > return; > } > > + /* In OpenGL GLSL 4.20 spec, section 4.3.9, page 57: > + * > + * "It is a link-time error if any particular shader interface > + *contains: > + * > + *- two different blocks, each having no instance name, and each > + * having a member of the same name, or > + * > + *- a variable outside a block, and a block with no instance > name, > + * where the variable has the same name as a member in the > block." > + */ > + if (var->data.mode == existing->data.mode && > + var->get_interface_type() != existing->get_interface_type()) { > +linker_error(prog, "declarations for %s `%s` are in " > + "%s and %s\n", > + mode_string(var), var->name, > + existing->get_interface_type() ? > + existing->get_interface_type()->name : "outside a > block", > + var->get_interface_type() ? > + var->get_interface_type()->name : "outside a > block"); > + > +return; > + } > /* Only in GLSL ES 3.10, the precision qualifier should not match >* between block members defined in matched block names within a >* shader interface. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/4] anv/query: implement multiview interactions
Ignore the 1/4 in the subject, that was my mistake, this is a single patch. Iago On Mon, 2018-01-08 at 13:57 +0100, Iago Toral Quiroga wrote: > From the Vulkan spec with KHX extensions: > > "If queries are used while executing a render pass instance that > has > multiview enabled, the query uses N consecutive query indices > in the query pool (starting at query) where N is the number of > bits > set in the view mask in the subpass the query is used in. > > How the numerical results of the query are distributed among the > queries is implementation-dependent. For example, some > implementations > may write each view's results to a distinct query, while other > implementations may write the total result to the first query and > write > zero to the other queries. However, the sum of the results in all > the > queries must accurately reflect the total result of the query > summed > over all views. Applications can sum the results from all the > queries to > compute the total result." > > In our case we only really emit a single query (in the first query > index) > that stores the aggregated result for all views, but we still need to > manage > availability for all the other query indices involved, even if we > don't > actually use them. > > This is relevant when clients call vkGetQueryPoolResults and pass all > N > queries to retrieve the results. In that scenario, without this > patch, > we will never see queries other than the first being available since > we > never emit them. > > Fixes test failures in some work-in-progress CTS multiview+query > tests. > --- > src/intel/vulkan/genX_query.c | 36 > > 1 file changed, 36 insertions(+) > > diff --git a/src/intel/vulkan/genX_query.c > b/src/intel/vulkan/genX_query.c > index 7683d0d1e3..231c605b6b 100644 > --- a/src/intel/vulkan/genX_query.c > +++ b/src/intel/vulkan/genX_query.c > @@ -462,6 +462,24 @@ void genX(CmdEndQuery)( > default: > unreachable(""); > } > + > + /* When multiview is active the spec requires that N consecutive > query > +* indices are used, where N is the number of active views in the > subpass. > +* The spec allows that we only write the results to one of the > queries > +* but we still need to manage result availability for all the > query indices. > +* Since we only emit a single query for all active views in the > +* first index, mark the other query indices as being already > available > +* with result 0. > +*/ > + if (!cmd_buffer->state.subpass || !cmd_buffer->state.subpass- > >view_mask) > + return; > + > + uint32_t num_queries = _mesa_bitcount(cmd_buffer->state.subpass- > >view_mask); > + for (uint32_t i = 1; i < num_queries; i++) { > + uint64_t *slot = pool->bo.map + (query + i) * pool->stride; > + slot[0] = 1; > + memset([1], 0, sizeof(uint64_t) * pool->stride); > + } > } > > #define TIMESTAMP 0x2358 > @@ -504,6 +522,24 @@ void genX(CmdWriteTimestamp)( > } > > emit_query_availability(cmd_buffer, >bo, offset); > + > + /* When multiview is active the spec requires that N consecutive > query > +* indices are used, where N is the number of active views in the > subpass. > +* The spec allows that we only write the results to one of the > queries > +* but we still need to manage result availability for all the > query indices. > +* Since we only emit a single query for all active views in the > +* first index, mark the other query indices as being already > available > +* with result 0. > +*/ > + if (!cmd_buffer->state.subpass || !cmd_buffer->state.subpass- > >view_mask) > + return; > + > + uint32_t num_queries = _mesa_bitcount(cmd_buffer->state.subpass- > >view_mask); > + for (uint32_t i = 1; i < num_queries; i++) { > + uint64_t *slot = pool->bo.map + (query + i) * pool->stride; > + slot[0] = 1; > + memset([1], 0, sizeof(uint64_t) * pool->stride); > + } > } > > #if GEN_GEN > 7 || GEN_IS_HASWELL ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] glsl: remove LowerTCSPatchVerticesIn
Intel was the only user, now it does this in NIR. --- src/compiler/glsl/builtin_variables.cpp | 13 + src/mesa/drivers/dri/i965/brw_context.c | 1 - src/mesa/main/mtypes.h | 1 - 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp index a885f32875..a686cb6a45 100644 --- a/src/compiler/glsl/builtin_variables.cpp +++ b/src/compiler/glsl/builtin_variables.cpp @@ -38,11 +38,6 @@ static const struct gl_builtin_uniform_element gl_NumSamples_elements[] = { {NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_} }; -/* only for TCS */ -static const struct gl_builtin_uniform_element gl_PatchVerticesIn_elements[] = { - {NULL, {STATE_INTERNAL, STATE_TCS_PATCH_VERTICES_IN}, SWIZZLE_} -}; - static const struct gl_builtin_uniform_element gl_DepthRange_elements[] = { {"near", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_}, {"far", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_}, @@ -240,7 +235,6 @@ static const struct gl_builtin_uniform_element gl_NormalMatrix_elements[] = { #define STATEVAR(name) {#name, name ## _elements, ARRAY_SIZE(name ## _elements)} static const struct gl_builtin_uniform_desc _mesa_builtin_uniform_desc[] = { - STATEVAR(gl_PatchVerticesIn), STATEVAR(gl_NumSamples), STATEVAR(gl_DepthRange), STATEVAR(gl_ClipPlane), @@ -1067,12 +1061,7 @@ builtin_variable_generator::generate_tcs_special_vars() { add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); - - if (state->ctx->Const.LowerTCSPatchVerticesIn) { - add_uniform(int_t, "gl_PatchVerticesIn"); - } else { - add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn"); - } + add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn"); add_output(VARYING_SLOT_TESS_LEVEL_OUTER, array(float_t, 4), "gl_TessLevelOuter")->data.patch = 1; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index b555f7bfdf..e44c2b1f34 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -537,7 +537,6 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxClipPlanes = 8; ctx->Const.GLSLTessLevelsAsInputs = true; - ctx->Const.LowerTCSPatchVerticesIn = devinfo->gen >= 8; ctx->Const.LowerTESPatchVerticesIn = true; ctx->Const.PrimitiveRestartForPatches = true; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index a4f1e78f41..fce456b2b4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -4020,7 +4020,6 @@ struct gl_constants GLuint MaxTessPatchComponents; GLuint MaxTessControlTotalOutputComponents; bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to vecn? */ - bool LowerTCSPatchVerticesIn; /**< Lower gl_PatchVerticesIn to a uniform */ bool LowerTESPatchVerticesIn; /**< Lower gl_PatchVerticesIn to a uniform */ bool PrimitiveRestartForPatches; bool LowerCsDerivedVariables;/**< Lower gl_GlobalInvocationID and -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] i965/nir: add a helper to lower TCS gl_PatchVerticesIn to a uniform
--- src/intel/compiler/brw_nir.h | 2 ++ src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 29 ++ 2 files changed, 31 insertions(+) diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 809d4c338d..a29d858806 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -145,6 +145,8 @@ void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data); +void brw_nir_lower_tcs_vertices_in_to_uniform(nir_shader *nir); + void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, nir_shader *nir, struct brw_ubo_range out_ranges[4]); diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index 9e135cbb1a..38498833a7 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -243,3 +243,32 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO; } } + +void +brw_nir_lower_tcs_vertices_in_to_uniform(nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_TESS_CTRL); + + int uniform = -1; + nir_foreach_variable_safe(var, >system_values) { + if (var->data.location != SYSTEM_VALUE_VERTICES_IN) + continue; + + if (uniform == -1) { + gl_state_index tokens[STATE_LENGTH] = { +STATE_INTERNAL, +STATE_TCS_PATCH_VERTICES_IN, + }; + var->num_state_slots = 1; + var->state_slots = +ralloc_array(var, nir_state_slot, var->num_state_slots); + memcpy(var->state_slots[0].tokens, tokens, sizeof(tokens)); + var->state_slots[0].swizzle = SWIZZLE_; + } + + var->data.mode = nir_var_uniform; + var->data.location = uniform; + exec_node_remove(>node); + exec_list_push_tail(>uniforms, >node); + } +} -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] i965: lower TCS gl_PatchVerticesIn to uniform
We want this here instead of nir_lower_system_values because for Vulkan we don't want this lowering to take place. --- src/mesa/drivers/dri/i965/brw_program.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 5b168c25e3..84f29e9c7f 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -88,6 +88,12 @@ brw_create_nir(struct brw_context *brw, } nir_validate_shader(nir); + /* Lower TCS PatchVerticesIn from system value to uniform. This needs to +* happen before brw_preprocess_nir, since that will lower system values. +*/ + if (stage == MESA_SHADER_TESS_CTRL && brw->screen->devinfo.gen >= 8) + brw_nir_lower_tcs_vertices_in_to_uniform(nir); + nir = brw_preprocess_nir(brw->screen->compiler, nir); if (stage == MESA_SHADER_FRAGMENT) { -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/4] anv/query: implement multiview interactions
This version adds the same behavior for vkCmdWriteTimestamp, which has similar interactions with multiview for our case. Iago On Mon, 2018-01-08 at 13:57 +0100, Iago Toral Quiroga wrote: > From the Vulkan spec with KHX extensions: > > "If queries are used while executing a render pass instance that > has > multiview enabled, the query uses N consecutive query indices > in the query pool (starting at query) where N is the number of > bits > set in the view mask in the subpass the query is used in. > > How the numerical results of the query are distributed among the > queries is implementation-dependent. For example, some > implementations > may write each view's results to a distinct query, while other > implementations may write the total result to the first query and > write > zero to the other queries. However, the sum of the results in all > the > queries must accurately reflect the total result of the query > summed > over all views. Applications can sum the results from all the > queries to > compute the total result." > > In our case we only really emit a single query (in the first query > index) > that stores the aggregated result for all views, but we still need to > manage > availability for all the other query indices involved, even if we > don't > actually use them. > > This is relevant when clients call vkGetQueryPoolResults and pass all > N > queries to retrieve the results. In that scenario, without this > patch, > we will never see queries other than the first being available since > we > never emit them. > > Fixes test failures in some work-in-progress CTS multiview+query > tests. > --- > src/intel/vulkan/genX_query.c | 36 > > 1 file changed, 36 insertions(+) > > diff --git a/src/intel/vulkan/genX_query.c > b/src/intel/vulkan/genX_query.c > index 7683d0d1e3..231c605b6b 100644 > --- a/src/intel/vulkan/genX_query.c > +++ b/src/intel/vulkan/genX_query.c > @@ -462,6 +462,24 @@ void genX(CmdEndQuery)( > default: > unreachable(""); > } > + > + /* When multiview is active the spec requires that N consecutive > query > +* indices are used, where N is the number of active views in the > subpass. > +* The spec allows that we only write the results to one of the > queries > +* but we still need to manage result availability for all the > query indices. > +* Since we only emit a single query for all active views in the > +* first index, mark the other query indices as being already > available > +* with result 0. > +*/ > + if (!cmd_buffer->state.subpass || !cmd_buffer->state.subpass- > >view_mask) > + return; > + > + uint32_t num_queries = _mesa_bitcount(cmd_buffer->state.subpass- > >view_mask); > + for (uint32_t i = 1; i < num_queries; i++) { > + uint64_t *slot = pool->bo.map + (query + i) * pool->stride; > + slot[0] = 1; > + memset([1], 0, sizeof(uint64_t) * pool->stride); > + } > } > > #define TIMESTAMP 0x2358 > @@ -504,6 +522,24 @@ void genX(CmdWriteTimestamp)( > } > > emit_query_availability(cmd_buffer, >bo, offset); > + > + /* When multiview is active the spec requires that N consecutive > query > +* indices are used, where N is the number of active views in the > subpass. > +* The spec allows that we only write the results to one of the > queries > +* but we still need to manage result availability for all the > query indices. > +* Since we only emit a single query for all active views in the > +* first index, mark the other query indices as being already > available > +* with result 0. > +*/ > + if (!cmd_buffer->state.subpass || !cmd_buffer->state.subpass- > >view_mask) > + return; > + > + uint32_t num_queries = _mesa_bitcount(cmd_buffer->state.subpass- > >view_mask); > + for (uint32_t i = 1; i < num_queries; i++) { > + uint64_t *slot = pool->bo.map + (query + i) * pool->stride; > + slot[0] = 1; > + memset([1], 0, sizeof(uint64_t) * pool->stride); > + } > } > > #if GEN_GEN > 7 || GEN_IS_HASWELL ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/4] anv/query: implement multiview interactions
From the Vulkan spec with KHX extensions: "If queries are used while executing a render pass instance that has multiview enabled, the query uses N consecutive query indices in the query pool (starting at query) where N is the number of bits set in the view mask in the subpass the query is used in. How the numerical results of the query are distributed among the queries is implementation-dependent. For example, some implementations may write each view's results to a distinct query, while other implementations may write the total result to the first query and write zero to the other queries. However, the sum of the results in all the queries must accurately reflect the total result of the query summed over all views. Applications can sum the results from all the queries to compute the total result." In our case we only really emit a single query (in the first query index) that stores the aggregated result for all views, but we still need to manage availability for all the other query indices involved, even if we don't actually use them. This is relevant when clients call vkGetQueryPoolResults and pass all N queries to retrieve the results. In that scenario, without this patch, we will never see queries other than the first being available since we never emit them. Fixes test failures in some work-in-progress CTS multiview+query tests. --- src/intel/vulkan/genX_query.c | 36 1 file changed, 36 insertions(+) diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 7683d0d1e3..231c605b6b 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -462,6 +462,24 @@ void genX(CmdEndQuery)( default: unreachable(""); } + + /* When multiview is active the spec requires that N consecutive query +* indices are used, where N is the number of active views in the subpass. +* The spec allows that we only write the results to one of the queries +* but we still need to manage result availability for all the query indices. +* Since we only emit a single query for all active views in the +* first index, mark the other query indices as being already available +* with result 0. +*/ + if (!cmd_buffer->state.subpass || !cmd_buffer->state.subpass->view_mask) + return; + + uint32_t num_queries = _mesa_bitcount(cmd_buffer->state.subpass->view_mask); + for (uint32_t i = 1; i < num_queries; i++) { + uint64_t *slot = pool->bo.map + (query + i) * pool->stride; + slot[0] = 1; + memset([1], 0, sizeof(uint64_t) * pool->stride); + } } #define TIMESTAMP 0x2358 @@ -504,6 +522,24 @@ void genX(CmdWriteTimestamp)( } emit_query_availability(cmd_buffer, >bo, offset); + + /* When multiview is active the spec requires that N consecutive query +* indices are used, where N is the number of active views in the subpass. +* The spec allows that we only write the results to one of the queries +* but we still need to manage result availability for all the query indices. +* Since we only emit a single query for all active views in the +* first index, mark the other query indices as being already available +* with result 0. +*/ + if (!cmd_buffer->state.subpass || !cmd_buffer->state.subpass->view_mask) + return; + + uint32_t num_queries = _mesa_bitcount(cmd_buffer->state.subpass->view_mask); + for (uint32_t i = 1; i < num_queries; i++) { + uint64_t *slot = pool->bo.map + (query + i) * pool->stride; + slot[0] = 1; + memset([1], 0, sizeof(uint64_t) * pool->stride); + } } #if GEN_GEN > 7 || GEN_IS_HASWELL -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Mesa 17.3.2 release candidate
On Thursday, 2018-01-04 14:10:52 +, Emil Velikov wrote: > Hello list, > > The release candidate for Mesa 17.3.2 is now available. Currently we have: > - 13 queued > - 18 nominated (outstanding) > - and 0 rejected patches > > > In the current queue we have: > > Multiple fixes in the RADV Vulkan driver, workaround when using slibtool and > a GLSL workaround for various titles using Unreal Engine 4. > > Take a look at section "Mesa stable queue" for more information. > > > Testing reports/general approval > > > Any testing reports (or general approval of the state of the branch) > will be greatly appreciated. > > The plan is to have 17.3.2 this Saturday (6th of January), around or > shortly after 15:00 GMT. > > If you have any questions or suggestions - be that about the current > patch queue or otherwise, please go ahead. > > > Cheers, > Emil > > > Mesa stable queue > - > > Nominated (18) > == > > Bas Nieuwenhuizen (8): > c39947ce304 radv: Invert condition for all samples identical > during resolve. > cebc9a119d4 radv: Flush caches before subpass resolve. > 73279da41da radv: Fix fragment resolve destination offset. > da192b50b24 radv: Use correct framebuffer size for partial FS resolves. > a636208ace0 radv: Always use fragment resolve if dest uses DCC. > 5158603182f radv: Use correct HTILE expanded words. > 1c78e4f0535 radv: Allow writing 0 scissors. > c99426ea831 ac/nir: Handle loading data from compact arrays. > > Dave Airlie (8): > 38e4467e99a radv: fix events on compute queues. > ec1edd0fd29 radv: fix pipeline statistics end query on compute queue > d08f2678145 radv/gfx9: fix 3d image to image transfers on compute > queues. > 09612a62e16 radv/gfx9: fix 3d image clears on compute queues > 420627e6e79 radv/gfx9: fix buffer to image for 3d images on compute > queues > 59515780433 radv/gfx9: fix block compression texture views. > 868377ab33e radv/gfx9: use a bigger hammer to flush cb/db caches. > a4c23ce1b68 radv/gfx9: use correct swizzle parameter to work out > border swizzle. > > Jason Ekstrand (1): > 18fde36ced4 intel/fs: Use the original destination region for > int MUL lowering > > Lucas Stach (1): > 01585659245 etnaviv: disable in-place resolve for non-supertiled > surfaces > > > Queued (13) > === > > Bas Nieuwenhuizen (1): > radv: Fix DCC compatible formats. > > Brendan King (1): > egl: link libEGL against the dynamic version of libglapi > > Dave Airlie (6): > radv/gfx9: add support for 3d images to blit 2d paths > radv: handle depth/stencil image copy with layouts better. (v3.1) > radv/meta: fix blit paths for depth/stencil (v2.1) > radv: fix issue with multisample positions and interp_var_at_sample. > radv/gfx9: add 3d sampler image->buffer copy shader. (v3) > radv: don't do format replacement on tc compat htile surfaces. > > Eric Engestrom (1): > egl: let each platform decided how to handle LIBGL_ALWAYS_SOFTWARE I broke Android in this commit, please include RobHer's aa187fe7bfac856207a5 "egl/android: Fix build break with dri2_initialize_android _EGLDisplay parameter" Cheers :) > > Samuel Pitoiset (2): > radv/gfx9: fix primitive topology when adjacency is used > radv: use a faster version for nir_op_pack_half_2x16 > > Tapani Pälli (2): > mesa: add AllowGLSLCrossStageInterpolationMismatch workaround > drirc: set allow_glsl_cross_stage_interpolation_mismatch for more games > > > Rejected (0) > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] radeonsi nir arb_vertex_attrib_64bit fixes
thanks for the three first patches. They seem like they could make it a bit easier for me to implement this in nouveau, but I don't know when I will have the chance to look into that, because I want to fix 64 bit stuff in general first. On Mon, Jan 8, 2018 at 1:35 AM, Timothy Arceriwrote: > This series fixes all of the failing arb_vertex_attrib_64bit > piglit tests ~1000. > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 103538] vkDestroySwapchain causes deadlock with X11
https://bugs.freedesktop.org/show_bug.cgi?id=103538 Daniel Stonechanged: What|Removed |Added Summary|vkDestroySwapchain causes |vkDestroySwapchain causes |deadlock on Wayland |deadlock with X11 |compositor with X11 | -- You are receiving this mail because: You are the assignee for the bug.___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] genxml: Add missing INSTDONE_1 bits on Gen7.5+.
Reviewed-by: Lionel LandwerlinOn 08/01/18 06:33, Kenneth Graunke wrote: This will make aubinator_error_decode decode them properly. --- src/intel/genxml/gen10.xml | 2 ++ src/intel/genxml/gen75.xml | 2 ++ src/intel/genxml/gen8.xml | 2 ++ src/intel/genxml/gen9.xml | 2 ++ 4 files changed, 8 insertions(+) diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml index a6b8f48fda5..47c679a3fa9 100644 --- a/src/intel/genxml/gen10.xml +++ b/src/intel/genxml/gen10.xml @@ -3637,6 +3637,8 @@ + + diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index e2fd856197d..be537aff0ae 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -3046,6 +3046,8 @@ + + diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index a89283ded6b..c075eecc34a 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -3300,6 +3300,8 @@ + + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 1422463693d..2533ae8629f 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -3583,6 +3583,8 @@ + + ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radv/gfx9: do not load VGPR1 when GS uses points or lines
Reviewed-by: Bas NieuwenhuizenOn Fri, Jan 5, 2018 at 5:26 PM, Samuel Pitoiset wrote: > VGPR1 is only needed for topology that needs 3 offsets like > triangles or quads. > > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/radv_shader.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c > index 31879805ae..a8d263d817 100644 > --- a/src/amd/vulkan/radv_shader.c > +++ b/src/amd/vulkan/radv_shader.c > @@ -421,8 +421,10 @@ radv_fill_shader_variant(struct radv_device *device, > gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. > */ > else if (info->uses_prim_id) > gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. > */ > + else if (variant->info.gs.vertices_in >= 3) > + gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 > */ > else > - gs_vgpr_comp_cnt = 1; /* TODO: use input_prim */ > + gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 > */ > > /* TODO: Figure out how many we actually need. */ > variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt); > -- > 2.15.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/6] radv: make shader BOs read-only for the GPU
Reviewed-by: Bas Nieuwenhuizenfor the series. On Thu, Jan 4, 2018 at 6:39 PM, Samuel Pitoiset wrote: > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/radv_device.c | 2 ++ > src/amd/vulkan/radv_private.h | 1 + > src/amd/vulkan/radv_shader.c | 5 - > 3 files changed, 7 insertions(+), 1 deletion(-) > > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index 5f78af624b..528d3539c9 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -263,6 +263,8 @@ radv_physical_device_init(struct radv_physical_device > *device, > */ > device->has_clear_state = device->rad_info.chip_class >= CIK; > > + device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= > VI; > + > radv_physical_device_init_mem_types(device); > > result = radv_init_wsi(device); > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > index bae353c0e5..6a19394ad5 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -272,6 +272,7 @@ struct radv_physical_device { > bool has_rbplus; /* if RB+ register exist */ > bool rbplus_allowed; /* if RB+ is allowed */ > bool has_clear_state; > + bool cpdma_prefetch_writes_memory; > > /* This is the drivers on-disk cache used as a fallback as opposed to > * the pipeline cache defined by apps. > diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c > index ab8ba42511..74f2384a64 100644 > --- a/src/amd/vulkan/radv_shader.c > +++ b/src/amd/vulkan/radv_shader.c > @@ -330,7 +330,10 @@ radv_alloc_shader_memory(struct radv_device *device, > > slab->size = 256 * 1024; > slab->bo = device->ws->buffer_create(device->ws, slab->size, 256, > -RADEON_DOMAIN_VRAM, > RADEON_FLAG_NO_INTERPROCESS_SHARING); > +RADEON_DOMAIN_VRAM, > + > RADEON_FLAG_NO_INTERPROCESS_SHARING | > + > device->physical_device->cpdma_prefetch_writes_memory ? > +0 : > RADEON_FLAG_READ_ONLY); > slab->ptr = (char*)device->ws->buffer_map(slab->bo); > list_inithead(>shaders); > > -- > 2.15.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] radeonsi: use pknorm_i16/u16 and pk_i16/u16 LLVM intrinsics
On 01/05/2018 08:32 PM, Marek Olšák wrote: Here: https://cgit.freedesktop.org/~mareko/mesa/log/?h=master LLVM: https://reviews.llvm.org/D41663 Thanks! Looks good to me, no performance changes and no regressions as well. This patch is: Reviewed-by: Samuel PitoisetMarek On Fri, Jan 5, 2018 at 7:19 PM, Samuel Pitoiset wrote: On 01/05/2018 02:45 AM, Marek Olšák wrote: On Thu, Jan 4, 2018 at 10:25 AM, Samuel Pitoiset wrote: How about performance? Few weeks ago, I fixed a bug (5f81a43535e8512cef26ea3dcd1e3a489bd5a1bb) which affected F1 2017 and DOW3 on RADV, and it was also a nice performance boost, this is why I'm asking. No idea. This just decreases the number of instructions in some PS epilogs. Okay, the series no longer applies on master, do you have a branch somewhere? I would like to run, at least, F1 and DOW3. Marek On 01/04/2018 01:55 AM, Marek Olšák wrote: From: Marek Olšák --- src/amd/common/ac_llvm_build.c | 164 +++ src/amd/common/ac_llvm_build.h | 13 +++ src/gallium/drivers/radeonsi/si_shader.c | 152 3 files changed, 216 insertions(+), 113 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 7100e52..c48a186 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -61,20 +61,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, ctx->voidt = LLVMVoidTypeInContext(ctx->context); ctx->i1 = LLVMInt1TypeInContext(ctx->context); ctx->i8 = LLVMInt8TypeInContext(ctx->context); ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64; ctx->f16 = LLVMHalfTypeInContext(ctx->context); ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f64 = LLVMDoubleTypeInContext(ctx->context); + ctx->v2i16 = LLVMVectorType(ctx->i16, 2); ctx->v2i32 = LLVMVectorType(ctx->i32, 2); ctx->v3i32 = LLVMVectorType(ctx->i32, 3); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); ctx->v2f32 = LLVMVectorType(ctx->f32, 2); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false); ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false); ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0); @@ -1214,20 +1215,34 @@ LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, } LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) { LLVMValueRef args[2] = {a, b}; return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, args, 2, AC_FUNC_ATTR_READNONE); } +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, + LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, + LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) { LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, ""); return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); } LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value) { if (HAVE_LLVM >= 0x0500) { @@ -1439,20 +1454,169 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, v2f16, args, 2, AC_FUNC_ATTR_READNONE); return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_LEGACY); } +/* Upper 16 bits must be zero. */ +static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx, + LLVMValueRef val[2]) +{ + return LLVMBuildOr(ctx->builder, val[0], + LLVMBuildShl(ctx->builder, val[1], + LLVMConstInt(ctx->i32, 16, 0), + ""), ""); +} +
Re: [Mesa-dev] Allocator Nouveau driver, Mesa EXT_external_objects, and DRM metadata import interfaces
Just wanted to clarify this one thing here, otherwise I think Rob/krh covered it all. On Thu, Dec 28, 2017 at 10:24:38AM -0800, Miguel Angel Vico wrote: > Daniel Vetter wrote: > > I think in the interim figuring out how to expose kms capabilities > > better (and necessarily standardizing at least some of them which > > matter at the compositor level, like size limits of framebuffers) > > feels like the place to push the ecosystem forward. In some way > > Miguel's proposal looks a bit backwards, since it adds the pitch > > capabilities to addfb, but at addfb time you've allocated everything > > already, so way too late to fix things up. With modifiers we've added > > a very simple per-plane property to list which modifiers can be > > combined with which pixel formats. Tiny start, but obviously very far > > from all that we'll need. > > Not sure whether I might be misunderstanding your statement, but one of > the allocator main features is negotiation of nearly optimal allocation > parameters given a set of uses on different devices/engines by the > capability merge operation. A client should have queried what every > device/engine is capable of for the given uses, find the optimal set of > capabilities, and use it for allocating a buffer. At the moment these > parameters are given to KMS, they are expected to be good. If they > aren't, the client didn't do things right. Your example code has a new capability for PITCH_ALIGNMENT. That looks wrong for addfb (which should only received the the computed intersection of all requirements, not the requirements itself). And since that was the only thing in your example code besides the bare boilerplate to wire it all up it looks a bit confused. Maybe we need to distinguish capabilities into constraints on properties (like pitch alignment, or power-of-two pitch) and properties (like pitch) themselves. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] ac: rework emit_barrier() to not segfault on radeonsi
Reviewed-by: Samuel PitoisetOn 01/08/2018 07:45 AM, Timothy Arceri wrote: nir_to_llvm_context will always be NULL for radeonsi so we need work around this. --- src/amd/common/ac_nir_to_llvm.c | 17 - 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 5203b78537..a729fe5f6d 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3826,19 +3826,18 @@ static void emit_membar(struct nir_to_llvm_context *ctx, ac_build_waitcnt(>ac, waitcnt); } -static void emit_barrier(struct nir_to_llvm_context *ctx) +static void emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) { /* SI only (thanks to a hw bug workaround): * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ctx->options->chip_class == SI && - ctx->stage == MESA_SHADER_TESS_CTRL) { - ac_build_waitcnt(>ac, LGKM_CNT & VM_CNT); + if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) { + ac_build_waitcnt(ac, LGKM_CNT & VM_CNT); return; } - ac_build_intrinsic(>ac, "llvm.amdgcn.s.barrier", - ctx->ac.voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); + ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier", + ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); } static void emit_discard_if(struct ac_nir_context *ctx, @@ -4331,7 +4330,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, emit_membar(ctx->nctx, instr); break; case nir_intrinsic_barrier: - emit_barrier(ctx->nctx); + emit_barrier(>ac, ctx->stage); break; case nir_intrinsic_var_atomic_add: case nir_intrinsic_var_atomic_imin: @@ -6169,7 +6168,7 @@ write_tess_factors(struct nir_to_llvm_context *ctx) LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer; LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4]; int i; - emit_barrier(ctx); + emit_barrier(>ac, ctx->stage); switch (ctx->options->key.tcs.primitive_mode) { case GL_ISOLINES: @@ -6712,7 +6711,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, } if (i) - emit_barrier(); + emit_barrier(, ctx.stage); ac_setup_rings(); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [GSOC] DriConf Replacement
Hello Michael, Finally I found some free time to spent on this. Can you please give me a small example on how I can get the device_ids ? Currently to retrieve the available driver options I'm doing something like this: 1 - Count the number of screens using "ScreenCount" function from Xlib. 2 - For each screen retrieve the driver name with "glXGetScreenDriver". 3 - For each driver retrieve the available options with "glXGetDriverConfig". Please note that my knowledge about openGL, x11 and mesa is almost zero, so if you have any documentation that I can read it would be nice too. Kind Regards, Jean Hertel - On 10/04/17 06:02 AM, Michel Dänzer wrote: >On 05/04/17 05:26 PM, Nicolai Hähnle wrote: >> On 04.04.2017 01:52, Jean Hertel wrote: >> >> 2c) Consider adding an option to configure PRIME to driconf. >> >> [2b and 2c will also require changes in Mesa; also, you may want to get >> rid of the implicit dependency on xdriinfo] > >FWIW, Mesa already supports 2c) with DRI3, via the device_id option. > > >-- >Earthling Michel Dänzer | http://www.amd.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/22] dri/common: Add option to allow exposure of 10 bpc color configs. (v2)
On 01/08/2018 01:55 AM, Steven Newbury wrote: On Fri, 2017-12-15 at 23:04 +0100, Mario Kleiner wrote: Some clients may not like RGB10X2 and RGB10A2 fbconfigs and visuals. Add a new driconf option 'allow_rgb10_configs' to allow per application enable/disable. The option defaults to enabled. v2: Rename expose_rgb10_configs to allow_rgb10_configs, as suggested by Emil. Add comment to option parsing, to make sure it stays before the ->InitScreen(). Signed-off-by: Mario KleinerReviewed-by: Tapani Pälli Reviewed-by: Marek Olšák --- src/mesa/drivers/dri/common/dri_util.c | 12 src/util/xmlpool/t_options.h | 5 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index d504751..d4fba0b 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -55,6 +55,10 @@ const char __dri2ConfigOptions[] = DRI_CONF_SECTION_PERFORMANCE DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1) DRI_CONF_SECTION_END + + DRI_CONF_SECTION_MISCELLANEOUS + DRI_CONF_ALLOW_RGB10_CONFIGS("true") + DRI_CONF_SECTION_END DRI_CONF_END; This isn't exposing the driconf option for me with IVB HD4000. Adding the option to the same section in src/mesa/drivers/dri/i965/intel_screen.c did work though. Mind you having this default as true (which it does whether or not the option is available) is really bad here. On my LVDS display (which presumably isn't supporting 10bpc even if the chipset does, means by default my display colours are completely corrupted. What's more, the driconf option doesn't address Wayland compositors, for example GDM with Wayland doesn't respect the option but uses, I guess, a default config which happens to be 10bpc. I thought I knew how drirc works ... but seems like this is not the case. I thought driver could override this option by introducing same section in it's own drirc but that does not seem to work :/ I think we should set this globally false for now (until compositor issues are resolved) and also fix the option to work in a way that individual driver can set this value. FYI I filed but about this here: https://bugs.freedesktop.org/show_bug.cgi?id=104536 // Tapani ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/14] meson: fix missing dependencies
On 01/06/2018 01:36, Dylan Baker wrote: Quoting Greg V (2017-12-31 08:55:25) --- src/gallium/drivers/radeonsi/meson.build | 2 +- src/mesa/meson.build | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index 58132bf072..8a7045de0d 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -77,7 +77,7 @@ libradeonsi = static_library( ], c_args : [c_vis_args], cpp_args : [cpp_vis_args], - dependencies : dep_llvm, + dependencies : [dep_llvm, dep_libdrm], ) I believe this could be correct, but I don't see this in autotools or scons, what is the error you're seeing? ../src/gallium/drivers/radeonsi/si_fence.c:25:10: fatal error: 'libsync.h' file not found driver_radeonsi = declare_dependency( diff --git a/src/mesa/meson.build b/src/mesa/meson.build index ab6bc27312..74b8d97231 100644 --- a/src/mesa/meson.build +++ b/src/mesa/meson.build @@ -721,6 +721,7 @@ libmesa_gallium = static_library( cpp_args : [cpp_vis_args, cpp_msvc_compat_args], include_directories : [inc_common, include_directories('main')], link_with : [libglsl, libmesa_sse41], + dependencies : [dep_vdpau], build_by_default : false, ) This is wrong, there is no way that core mesa needs to link against vdpau, what error are you fixing? ../src/gallium/include/state_tracker/vdpau_dmabuf.h:37:10: fatal error: 'vdpau/vdpau.h' file not found I'm not sure how to mark a dependency as "only for headers"… ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/14] meson: fix BSD build
On 01/06/2018 01:06, Dylan Baker wrote: Quoting Greg V (2017-12-31 08:55:22) +is_like_linux = host_machine.system() == 'linux' or host_machine.system().contains('bsd') # FIXME: illumos? This won't cover dragonflybsd, which is just 'dragonfly' I think something like this would be better: is_like_linux = ['openbsd', 'netbsd', 'freebsd', 'dragonfly', 'linux'].contains(host_machine.system()) Yeah, looks good to me. I'd like to know what illumos returns, but I can't get it to run in VM. If you have access to an illumos machine, getting the result of: `python3 -c "import platform; print(platform.system().lower())"` so we can add that to meson's support os values. I don't have a machine set up yet, but it's probably "sunos". There was some discussion in Meson about converting that to "solaris": https://github.com/mesonbuild/meson/issues/1578 As a nit, I'd prefer "system_is_linux_like" to just "is_like_linux" but I'm not set on that like the first change. This can be bikeshedded for a long long time :) Maybe something like "system_has_kms_drm"? ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 09/14] meson: do not redeclare pkg-config dependencies (yet)
On 01/06/2018 01:27, Dylan Baker wrote: We absolutely can't merge this as-is, it will break all of the these since they'll end up linking the backend and frontend, which we're not supposed to do. The bug you linked to has nothing to do with the problem you describe either, that issue is that `dependency("threads")` is special in meson, and then evolved into a larger issue about the pkg-config generator. The actual problem here is real, but quite different. You can't use get_pkgconfig_variable to get the cflags. The actual workaround is to do something like: That is exactly the problem I've described. Looks like the linked bug starts with a different problem, but I linked it because it mentions "allow calling |get_pkgconfig_variable("Cflags")| and |get_pkgconfig_variable("Libs")".| || prog_pkgconfig = find_program('pkg-config') dep_vdpau = declare_dependency( compile_args : run_command(prog_pkgconfig, ['vdpau', '--cflags']).stdout().split(), ) ... And we'll need a new meson feature for doing this without calling pkg-config directly. I'll write that feature. Yeah, that's great :) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev