Re: [Mesa-dev] [PATCH 2/4] radeonsi: use compute shaders for clear_buffer & copy_buffer
It looks like there is a deadlock in LLVM when 2 shaders are compiled at the same time. I don't know why it doesn't happen with OpenGL. Marek On Thu, Oct 18, 2018 at 3:32 AM Michel Dänzer wrote: > On 2018-10-17 6:43 p.m., Marek Olšák wrote: > > Can you test the attached patch? > > Doesn't help, unfortunately. Backtraces with the patch attached. > > FWIW, this is on Bonaire. > > > -- > Earthling Michel Dänzer | http://www.amd.com > Libre software enthusiast | Mesa and X developer > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] radeonsi: use compute shaders for clear_buffer & copy_buffer
On Thu, 2018-10-18 at 09:32 +0200, Michel Dänzer wrote: > On 2018-10-17 6:43 p.m., Marek Olšák wrote: > > Can you test the attached patch? > > Doesn't help, unfortunately. Backtraces with the patch attached. > > FWIW, this is on Bonaire. Hi, fwiw, I don't see this hang on my carrizo/iceland machine. Jan > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: This is a digitally signed message part ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] radeonsi: use compute shaders for clear_buffer & copy_buffer
On 2018-10-17 6:43 p.m., Marek Olšák wrote: > Can you test the attached patch? Doesn't help, unfortunately. Backtraces with the patch attached. FWIW, this is on Bonaire. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer Thread 22 (Thread 0x7f2f6f7fe700 (LWP 7364)): #0 0x7f2fbcd03e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e398d1f2f0) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 #1 __pthread_cond_wait_common (abstime=0x0, mutex=0x55e398d1f2a0, cond=0x55e398d1f2c8) at pthread_cond_wait.c:502 #2 __pthread_cond_wait (cond=0x55e398d1f2c8, mutex=0x55e398d1f2a0) at pthread_cond_wait.c:655 #3 0x7f2fbb69dcfa in cnd_wait (mtx=0x55e398d1f2a0, cond=0x55e398d1f2c8) at ../../../include/c11/threads_posix.h:155 #4 util_queue_thread_func (input=input@entry=0x55e398d227f0) at ../../../src/util/u_queue.c:270 #5 0x7f2fbb69d8e8 in impl_thrd_routine (p=) at ../../../include/c11/threads_posix.h:87 #6 0x7f2fbccfdf2a in start_thread (arg=0x7f2f6f7fe700) at pthread_create.c:463 #7 0x7f2fc1d79edf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Thread 21 (Thread 0x7f2f6700 (LWP 7363)): #0 0x7f2fbcd03e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e398d1f2f0) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 #1 __pthread_cond_wait_common (abstime=0x0, mutex=0x55e398d1f2a0, cond=0x55e398d1f2c8) at pthread_cond_wait.c:502 #2 __pthread_cond_wait (cond=0x55e398d1f2c8, mutex=0x55e398d1f2a0) at pthread_cond_wait.c:655 #3 0x7f2fbb69dcfa in cnd_wait (mtx=0x55e398d1f2a0, cond=0x55e398d1f2c8) at ../../../include/c11/threads_posix.h:155 #4 util_queue_thread_func (input=input@entry=0x55e398d22640) at ../../../src/util/u_queue.c:270 #5 0x7f2fbb69d8e8 in impl_thrd_routine (p=) at ../../../include/c11/threads_posix.h:87 #6 0x7f2fbccfdf2a in start_thread (arg=0x7f2f6700) at pthread_create.c:463 #7 0x7f2fc1d79edf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Thread 20 (Thread 0x7f2f88ff9700 (LWP 7362)): #0 0x7f2fbcd03e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e398d1f2f0) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 #1 __pthread_cond_wait_common (abstime=0x0, mutex=0x55e398d1f2a0, cond=0x55e398d1f2c8) at pthread_cond_wait.c:502 #2 __pthread_cond_wait (cond=0x55e398d1f2c8, mutex=0x55e398d1f2a0) at pthread_cond_wait.c:655 #3 0x7f2fbb69dcfa in cnd_wait (mtx=0x55e398d1f2a0, cond=0x55e398d1f2c8) at ../../../include/c11/threads_posix.h:155 #4 util_queue_thread_func (input=input@entry=0x55e398d22490) at ../../../src/util/u_queue.c:270 #5 0x7f2fbb69d8e8 in impl_thrd_routine (p=) at ../../../include/c11/threads_posix.h:87 #6 0x7f2fbccfdf2a in start_thread (arg=0x7f2f88ff9700) at pthread_create.c:463 #7 0x7f2fc1d79edf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Thread 19 (Thread 0x7f2f897fa700 (LWP 7361)): #0 0x7f2fbcd03e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e398d1f2f0) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 #1 __pthread_cond_wait_common (abstime=0x0, mutex=0x55e398d1f2a0, cond=0x55e398d1f2c8) at pthread_cond_wait.c:502 #2 __pthread_cond_wait (cond=0x55e398d1f2c8, mutex=0x55e398d1f2a0) at pthread_cond_wait.c:655 #3 0x7f2fbb69dcfa in cnd_wait (mtx=0x55e398d1f2a0, cond=0x55e398d1f2c8) at ../../../include/c11/threads_posix.h:155 #4 util_queue_thread_func (input=input@entry=0x55e398d222e0) at ../../../src/util/u_queue.c:270 #5 0x7f2fbb69d8e8 in impl_thrd_routine (p=) at ../../../include/c11/threads_posix.h:87 #6 0x7f2fbccfdf2a in start_thread (arg=0x7f2f897fa700) at pthread_create.c:463 #7 0x7f2fc1d79edf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Thread 18 (Thread 0x7f2f89ffb700 (LWP 7360)): #0 0x7f2fbcd03e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e398d1f2f0) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 #1 __pthread_cond_wait_common (abstime=0x0, mutex=0x55e398d1f2a0, cond=0x55e398d1f2c8) at pthread_cond_wait.c:502 #2 __pthread_cond_wait (cond=0x55e398d1f2c8, mutex=0x55e398d1f2a0) at pthread_cond_wait.c:655 #3 0x7f2fbb69dcfa in cnd_wait (mtx=0x55e398d1f2a0, cond=0x55e398d1f2c8) at ../../../include/c11/threads_posix.h:155 #4 util_queue_thread_func (input=input@entry=0x55e398d238c0) at ../../../src/util/u_queue.c:270 #5 0x7f2fbb69d8e8 in impl_thrd_routine (p=) at ../../../include/c11/threads_posix.h:87 #6 0x7f2fbccfdf2a in start_thread (arg=0x7f2f89ffb700) at pthread_create.c:463 #7 0x7f2fc1d79edf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Thread 17 (Thread 0x7f2f8a7fc700 (LWP 7359)): #0 0x7f2fbcd03e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e398d1ed70) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 #1
Re: [Mesa-dev] [PATCH 2/4] radeonsi: use compute shaders for clear_buffer & copy_buffer
Can you test the attached patch? Marek On Wed, Oct 17, 2018 at 9:31 AM Michel Dänzer wrote: > On 2018-10-07 9:05 a.m., Marek Olšák wrote: > > From: Marek Olšák > > > > Fast color clears should be much faster. Also, fast color clears on > > evicted buffers should be 200x faster on GFX8 and older. > > Nice! Unfortunately, this broke clover with radeonsi. Everything using > OpenCL seems to hang, see e.g. the attached backtraces from clinfo. > > > -- > Earthling Michel Dänzer | http://www.amd.com > Libre software enthusiast | Mesa and X developer > From f0978b2afae808edf4ac281b14cd371305a5164b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 17 Oct 2018 12:41:38 -0400 Subject: [PATCH] radeonsi: fix a deadlock due to partially-initialized context on CI --- src/gallium/drivers/radeonsi/si_pipe.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 59e41c53300..06740bd0f5c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -575,12 +575,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, >null_const_buf); si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, >null_const_buf); - - /* Clear the NULL constant buffer, because loads should return zeros. */ - uint32_t clear_value = 0; - si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, -sctx->null_const_buf.buffer->width0, -_value, 4, SI_COHERENCY_SHADER); } uint64_t max_threads_per_block; @@ -625,6 +619,14 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, /* this must be last */ si_begin_new_gfx_cs(sctx); + + if (sctx->chip_class == CIK) { + /* Clear the NULL constant buffer, because loads should return zeros. */ + uint32_t clear_value = 0; + si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, +sctx->null_const_buf.buffer->width0, +_value, 4, SI_COHERENCY_SHADER); + } return >b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/4] radeonsi: use compute shaders for clear_buffer & copy_buffer
On 2018-10-07 9:05 a.m., Marek Olšák wrote: > From: Marek Olšák > > Fast color clears should be much faster. Also, fast color clears on > evicted buffers should be 200x faster on GFX8 and older. Nice! Unfortunately, this broke clover with radeonsi. Everything using OpenCL seems to hang, see e.g. the attached backtraces from clinfo. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer (gdb) info threads Id Target Id Frame * 1Thread 0x7f63ecdb2740 (LWP 24202) "clinfo" syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 2Thread 0x7f63e62bc700 (LWP 24203) "clinfo:rcs0" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915203af0) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 3Thread 0x7f63e5934700 (LWP 24204) "clinfo:disk$0" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915204768) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 4Thread 0x7f63e510a700 (LWP 24205) "clinfo:cs0" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915214aa0) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 5Thread 0x7f63d7fff700 (LWP 24206) "clinfo:disk$0" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e9152185a8) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 6Thread 0x7f63d77fe700 (LWP 24207) "clinfo:sh0" __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135 7Thread 0x7f63d6ffd700 (LWP 24208) "clinfo:sh1" __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135 8Thread 0x7f63c700 (LWP 24209) "clinfo:sh2" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 9Thread 0x7f63d67fc700 (LWP 24210) "clinfo:sh3" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 10 Thread 0x7f63d5ffb700 (LWP 24211) "clinfo:sh4" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 11 Thread 0x7f63d57fa700 (LWP 24212) "clinfo:sh5" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 12 Thread 0x7f63d4ff9700 (LWP 24213) "clinfo:sh6" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 13 Thread 0x7f63cf7fe700 (LWP 24214) "clinfo:sh7" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 14 Thread 0x7f63ceffd700 (LWP 24215) "clinfo:sh8" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 15 Thread 0x7f63ce7fc700 (LWP 24216) "clinfo:sh9" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 16 Thread 0x7f63cdffb700 (LWP 24217) "clinfo:sh10" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 17 Thread 0x7f63cd7fa700 (LWP 24218) "clinfo:sh11" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915217d00) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 18 Thread 0x7f63ccff9700 (LWP 24219) "clinfo:shlo0" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915218280) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 19 Thread 0x7f639bfff700 (LWP 24220) "clinfo:shlo1" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915218280) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 20 Thread 0x7f639b7fe700 (LWP 24221) "clinfo:shlo2" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915218280) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 21 Thread 0x7f639affd700 (LWP 24222) "clinfo:shlo3" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915218280) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 22 Thread 0x7f639a7fc700 (LWP 24223) "clinfo:shlo4" 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915218280) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 (gdb) thread apply all bt Thread 22 (Thread 0x7f639a7fc700 (LWP 24223)): #0 0x7f63e7e36e6c in futex_wait_cancelable (private=, expected=0, futex_word=0x55e915218280) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 #1 __pthread_cond_wait_common
[Mesa-dev] [PATCH 2/4] radeonsi: use compute shaders for clear_buffer & copy_buffer
From: Marek Olšák Fast color clears should be much faster. Also, fast color clears on evicted buffers should be 200x faster on GFX8 and older. --- src/gallium/drivers/radeonsi/Makefile.sources | 1 + src/gallium/drivers/radeonsi/meson.build | 1 + src/gallium/drivers/radeonsi/si_clear.c | 10 +- .../drivers/radeonsi/si_compute_blit.c| 285 ++ src/gallium/drivers/radeonsi/si_cp_dma.c | 180 +-- src/gallium/drivers/radeonsi/si_pipe.c| 22 +- src/gallium/drivers/radeonsi/si_pipe.h| 51 ++-- src/gallium/drivers/radeonsi/si_test_dma.c| 3 +- 8 files changed, 350 insertions(+), 203 deletions(-) create mode 100644 src/gallium/drivers/radeonsi/si_compute_blit.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index abdc4e07f1e..aeb9b7982c4 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -4,20 +4,21 @@ GENERATED_SOURCES := \ C_SOURCES := \ $(GENERATED_SOURCES) \ cik_sdma.c \ driinfo_radeonsi.h \ si_blit.c \ si_buffer.c \ si_build_pm4.h \ si_clear.c \ si_compute.c \ si_compute.h \ + si_compute_blit.c \ si_cp_dma.c \ si_debug.c \ si_descriptors.c \ si_dma.c \ si_dma_cs.c \ si_fence.c \ si_get.c \ si_gfx_cs.c \ si_gpu_load.c \ si_pipe.c \ diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index 4d6044f724b..2542f136d11 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -20,20 +20,21 @@ files_libradeonsi = files( 'cik_sdma.c', 'driinfo_radeonsi.h', 'si_blit.c', 'si_buffer.c', 'si_build_pm4.h', 'si_clear.c', 'si_compute.c', 'si_compute.h', + 'si_compute_blit.c', 'si_cp_dma.c', 'si_debug.c', 'si_descriptors.c', 'si_dma.c', 'si_dma_cs.c', 'si_fence.c', 'si_get.c', 'si_gfx_cs.c', 'si_gpu_load.c', 'si_perfcounter.c', diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index 4e07de81bac..520e5b94f4a 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -249,21 +249,21 @@ void vi_dcc_clear_level(struct si_context *sctx, * would be more efficient than separate per-layer clear operations. */ assert(tex->buffer.b.b.nr_storage_samples <= 2 || num_layers == 1); dcc_offset += tex->surface.u.legacy.level[level].dcc_offset; clear_size = tex->surface.u.legacy.level[level].dcc_fast_clear_size * num_layers; } si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size, - clear_value, SI_COHERENCY_CB_META); + _value, 4, SI_COHERENCY_CB_META); } /* Set the same micro tile mode as the destination of the last MSAA resolve. * This allows hitting the MSAA resolve fast path, which requires that both * src and dst micro tile modes match. */ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_texture *tex) { if (tex->buffer.b.is_shared || @@ -480,23 +480,24 @@ static void si_do_fast_color_clear(struct si_context *sctx, if (eliminate_needed && too_small) continue; /* DCC fast clear with MSAA should clear CMASK to 0xC. */ if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) { /* TODO: This doesn't work with MSAA. */ if (eliminate_needed) continue; + uint32_t clear_value = 0x; si_clear_buffer(sctx, >cmask_buffer->b.b, tex->cmask_offset, tex->surface.cmask_size, - 0x, SI_COHERENCY_CB_META); + _value, 4, SI_COHERENCY_CB_META); need_decompress_pass = true; } vi_dcc_clear_level(sctx, tex, 0, reset_value); if (eliminate_needed) need_decompress_pass = true; tex->separate_dcc_dirty = true; } else { @@ -511,23 +512,24 @@ static void si_do_fast_color_clear(struct si_context *sctx, /* RB+ doesn't work with CMASK fast clear on Stoney. */ if (sctx->family == CHIP_STONEY) continue;