On 24 February 2015 at 09:01, Maarten Lankhorst
<[email protected]> wrote:
> While I've closed off most races in a previous patch, a small race still
> existed
> where importing then unreffing cound cause an invalid bo. Add a test for this
> case.
>
> Racing sequence fixed:
>
> - thread 1 releases bo, refcount drops to zero, blocks on acquiring
> nvdev->lock.
> - thread 2 increases refcount to 1.
> - thread 2 decreases refcount to zero, blocks on acquiring nvdev->lock.
>
> At this point the 2 threads will clean up the same bo.
>
> How is this fixed? When thread 2 increases refcount to 1 it removes
> the bo from the list, and creates a new bo. The original thread
> will notice refcount was increased to 1 and skip deletion from list
> and closing the handle.
>
> Signed-off-by: Maarten Lankhorst <[email protected]>
> ---
> configure.ac | 1 +
> nouveau/nouveau.c | 69 +++++++++++------------
> tests/Makefile.am | 4 ++
> tests/nouveau/.gitignore | 1 +
> tests/nouveau/Makefile.am | 15 +++++
> tests/nouveau/threaded.c | 140
> ++++++++++++++++++++++++++++++++++++++++++++++
> xf86atomic.h | 6 +-
> 7 files changed, 198 insertions(+), 38 deletions(-)
> create mode 100644 tests/nouveau/.gitignore
> create mode 100644 tests/nouveau/Makefile.am
> create mode 100644 tests/nouveau/threaded.c
>
> diff --git a/configure.ac b/configure.ac
> index 8afee83..6dc5044 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -441,6 +441,7 @@ AC_CONFIG_FILES([
> tests/vbltest/Makefile
> tests/exynos/Makefile
> tests/tegra/Makefile
> + tests/nouveau/Makefile
> man/Makefile
> libdrm.pc])
> AC_OUTPUT
> diff --git a/nouveau/nouveau.c b/nouveau/nouveau.c
> index c6c153a..1c723b9 100644
> --- a/nouveau/nouveau.c
> +++ b/nouveau/nouveau.c
> @@ -351,29 +351,18 @@ nouveau_bo_del(struct nouveau_bo *bo)
>
> pthread_mutex_lock(&nvdev->lock);
> if (nvbo->name) {
> - if (atomic_read(&nvbo->refcnt)) {
> + if (atomic_read(&nvbo->refcnt) == 0) {
> + DRMLISTDEL(&nvbo->head);
> /*
> - * bo has been revived by a race with
> - * nouveau_bo_prime_handle_ref, or
> nouveau_bo_name_ref.
> - *
> - * In theory there's still a race possible with
> - * nouveau_bo_wrap, but when using this function
> - * the lifetime of the handle is probably already
> - * handled in another way. If there are races
> - * you're probably using nouveau_bo_wrap wrong.
> + * This bo has to be closed with the lock held because
> + * gem handles are not refcounted. If a shared bo is
> + * closed and re-opened in another thread a race
> + * against DRM_IOCTL_GEM_OPEN or drmPrimeFDToHandle
> + * might cause the bo to be closed accidentally while
> + * re-importing.
> */
> - pthread_mutex_unlock(&nvdev->lock);
> - return;
> + drmIoctl(bo->device->fd, DRM_IOCTL_GEM_CLOSE, &req);
> }
> - DRMLISTDEL(&nvbo->head);
> - /*
> - * This bo has to be closed with the lock held because gem
> - * handles are not refcounted. If a shared bo is closed and
> - * re-opened in another thread a race against
> - * DRM_IOCTL_GEM_OPEN or drmPrimeFDToHandle might cause the
> - * bo to be closed accidentally while re-importing.
> - */
> - drmIoctl(bo->device->fd, DRM_IOCTL_GEM_CLOSE, &req);
> pthread_mutex_unlock(&nvdev->lock);
> } else {
> DRMLISTDEL(&nvbo->head);
> @@ -418,7 +407,7 @@ nouveau_bo_new(struct nouveau_device *dev, uint32_t
> flags, uint32_t align,
>
> static int
> nouveau_bo_wrap_locked(struct nouveau_device *dev, uint32_t handle,
> - struct nouveau_bo **pbo)
> + struct nouveau_bo **pbo, int name)
> {
> struct nouveau_device_priv *nvdev = nouveau_device(dev);
> struct drm_nouveau_gem_info req = { .handle = handle };
> @@ -427,8 +416,24 @@ nouveau_bo_wrap_locked(struct nouveau_device *dev,
> uint32_t handle,
>
> DRMLISTFOREACHENTRY(nvbo, &nvdev->bo_list, head) {
> if (nvbo->base.handle == handle) {
> - *pbo = NULL;
> - nouveau_bo_ref(&nvbo->base, pbo);
> + if (atomic_inc_return(&nvbo->refcnt) == 1) {
> + /*
> + * Uh oh, this bo is dead and someone else
> + * will free it, but because refcnt is
> + * now non-zero fortunately they won't
> + * call the ioctl to close the bo.
> + *
> + * Remove this bo from the list so other
> + * calls to nouveau_bo_wrap_locked will
> + * see our replacement nvbo.
> + */
> + DRMLISTDEL(&nvbo->head);
> + if (!name)
> + name = nvbo->name;
> + break;
> + }
> +
> + *pbo = &nvbo->base;
> return 0;
> }
> }
> @@ -443,6 +448,7 @@ nouveau_bo_wrap_locked(struct nouveau_device *dev,
> uint32_t handle,
> atomic_set(&nvbo->refcnt, 1);
> nvbo->base.device = dev;
> abi16_bo_info(&nvbo->base, &req);
> + nvbo->name = name;
> DRMLISTADD(&nvbo->head, &nvdev->bo_list);
> *pbo = &nvbo->base;
> return 0;
> @@ -458,7 +464,7 @@ nouveau_bo_wrap(struct nouveau_device *dev, uint32_t
> handle,
> struct nouveau_device_priv *nvdev = nouveau_device(dev);
> int ret;
> pthread_mutex_lock(&nvdev->lock);
> - ret = nouveau_bo_wrap_locked(dev, handle, pbo);
> + ret = nouveau_bo_wrap_locked(dev, handle, pbo, 0);
> pthread_mutex_unlock(&nvdev->lock);
> return ret;
> }
> @@ -468,24 +474,13 @@ nouveau_bo_name_ref(struct nouveau_device *dev,
> uint32_t name,
> struct nouveau_bo **pbo)
> {
> struct nouveau_device_priv *nvdev = nouveau_device(dev);
> - struct nouveau_bo_priv *nvbo;
> struct drm_gem_open req = { .name = name };
> int ret;
>
> pthread_mutex_lock(&nvdev->lock);
> - DRMLISTFOREACHENTRY(nvbo, &nvdev->bo_list, head) {
> - if (nvbo->name == name) {
> - *pbo = NULL;
> - nouveau_bo_ref(&nvbo->base, pbo);
> - pthread_mutex_unlock(&nvdev->lock);
> - return 0;
> - }
> - }
> -
> ret = drmIoctl(dev->fd, DRM_IOCTL_GEM_OPEN, &req);
> if (ret == 0) {
> - ret = nouveau_bo_wrap_locked(dev, req.handle, pbo);
> - nouveau_bo((*pbo))->name = name;
> + ret = nouveau_bo_wrap_locked(dev, req.handle, pbo, name);
> }
>
> pthread_mutex_unlock(&nvdev->lock);
> @@ -537,7 +532,7 @@ nouveau_bo_prime_handle_ref(struct nouveau_device *dev,
> int prime_fd,
> pthread_mutex_lock(&nvdev->lock);
> ret = drmPrimeFDToHandle(dev->fd, prime_fd, &handle);
> if (ret == 0) {
> - ret = nouveau_bo_wrap_locked(dev, handle, bo);
> + ret = nouveau_bo_wrap_locked(dev, handle, bo, 0);
> if (!ret) {
> struct nouveau_bo_priv *nvbo = nouveau_bo(*bo);
> if (!nvbo->name) {
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 37b8d3a..38e4094 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -28,6 +28,10 @@ if HAVE_TEGRA
> SUBDIRS += tegra
> endif
>
> +if HAVE_NOUVEAU
> +SUBDIRS += nouveau
> +endif
> +
> if HAVE_LIBUDEV
>
> check_LTLIBRARIES = libdrmtest.la
> diff --git a/tests/nouveau/.gitignore b/tests/nouveau/.gitignore
> new file mode 100644
> index 0000000..837bfb9
> --- /dev/null
> +++ b/tests/nouveau/.gitignore
> @@ -0,0 +1 @@
> +threaded
> diff --git a/tests/nouveau/Makefile.am b/tests/nouveau/Makefile.am
> new file mode 100644
> index 0000000..8e3392e
> --- /dev/null
> +++ b/tests/nouveau/Makefile.am
> @@ -0,0 +1,15 @@
> +AM_CPPFLAGS = \
> + -I$(top_srcdir)/include/drm \
> + -I$(top_srcdir)/nouveau \
> + -I$(top_srcdir)
> +
> +AM_CFLAGS = -Wall -Werror
Please use $(WARN_CFLAGS)
> +AM_LDFLAGS = -pthread
> +
I assume that adding -lpthread to the LDADD below will be better, but
I'm not 100% sure.
> +LDADD = -ldl \
> + ../../nouveau/libdrm_nouveau.la \
> + ../../libdrm.la
> +
Move -ldl at the bottom of the list so that it doesn't bite us in the future.
> +noinst_PROGRAMS = \
> + threaded
If you want you can add this to the installed tests, and/or make check
> +
> diff --git a/tests/nouveau/threaded.c b/tests/nouveau/threaded.c
> new file mode 100644
> index 0000000..490a7fa
> --- /dev/null
> +++ b/tests/nouveau/threaded.c
> @@ -0,0 +1,140 @@
> +/*
> + * Copyright © 2015 Canonical Ltd. (Maarten Lankhorst)
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifdef HAVE_CONFIG_H
> +# include "config.h"
> +#endif
> +
> +#include <sys/ioctl.h>
> +#include <dlfcn.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <pthread.h>
> +
> +#include "xf86drm.h"
> +#include "nouveau.h"
> +
> +static const char default_device[] = "/dev/dri/renderD128";
> +
Reuse the defines in xf86drm.h ?
> +static typeof(ioctl) *old_ioctl;
> +static int failed;
> +
> +static int import_fd;
> +
> +int ioctl(int fd, unsigned long request, ...)
> +{
> + va_list va;
> + int ret;
> + void *arg;
> +
> + va_start(va, request);
> + arg = va_arg(va, void *);
> + ret = old_ioctl(fd, request, arg);
> + va_end(va);
> +
> + if (ret < 0 && request == DRM_IOCTL_GEM_CLOSE && errno == EINVAL)
> + failed = 1;
> +
> + return ret;
> +}
> +
> +static void *
> +openclose(void *dev)
> +{
> + struct nouveau_device *nvdev = dev;
> + struct nouveau_bo *bo = NULL;
> + int i;
> +
> + for (i = 0; i < 100000; ++i) {
> + if (!nouveau_bo_prime_handle_ref(nvdev, import_fd, &bo))
> + nouveau_bo_ref(NULL, &bo);
> + }
> + return NULL;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + drmVersionPtr version;
> + const char *device;
> + int err, fd, fd2;
> + struct nouveau_device *nvdev, *nvdev2;
> + struct nouveau_bo *bo;
> +
> + old_ioctl = dlsym(RTLD_NEXT, "ioctl");
> +
> + pthread_t t1, t2;
> +
> + if (argc < 2)
> + device = default_device;
> + else
> + device = argv[1];
> +
> + fd = open(device, O_RDWR);
> + fd2 = open(device, O_RDWR);
> + if (fd < 0 || fd2 < 0)
> + return 1;
> +
> + version = drmGetVersion(fd);
> + if (version) {
> + printf("Version: %d.%d.%d\n", version->version_major,
> + version->version_minor, version->version_patchlevel);
> + printf(" Name: %s\n", version->name);
> + printf(" Date: %s\n", version->date);
> + printf(" Description: %s\n", version->desc);
> +
> + drmFreeVersion(version);
> + }
> +
> + err = nouveau_device_wrap(fd, 0, &nvdev);
> + if (!err)
> + err = nouveau_device_wrap(fd2, 0, &nvdev2);
> + if (err < 0)
> + return 1;
> +
> + err = nouveau_bo_new(nvdev2, NOUVEAU_BO_GART, 0, 4096, NULL, &bo);
> + if (!err)
> + err = nouveau_bo_set_prime(bo, &import_fd);
> +
> + if (!err) {
> + pthread_create(&t1, NULL, openclose, nvdev);
> + pthread_create(&t2, NULL, openclose, nvdev);
> + }
> +
> + pthread_join(t1, NULL);
> + pthread_join(t2, NULL);
> +
> + close(import_fd);
> + nouveau_bo_ref(NULL, &bo);
> +
> + nouveau_device_del(&nvdev2);
> + nouveau_device_del(&nvdev);
> + close(fd2);
> + close(fd);
> +
> + if (failed)
> + fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed with EINVAL,\n"
> + "race in opening/closing bo is likely.\n");
> +
> + return failed;
> +}
> diff --git a/xf86atomic.h b/xf86atomic.h
> index 8c4b696..66a8d9a 100644
> --- a/xf86atomic.h
> +++ b/xf86atomic.h
> @@ -49,7 +49,8 @@ typedef struct {
> # define atomic_read(x) ((x)->atomic)
> # define atomic_set(x, val) ((x)->atomic = (val))
> # define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
> -# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) ==
> 1)
> +# define atomic_inc_return(x) (__sync_add_and_fetch (&(x)->atomic, 1))
> +# define atomic_dec_and_test(x) (__sync_add_and_fetch (&(x)->atomic, -1) ==
> 0)
The atomic_dec_and_test change seems like unrelated bugfix. Split it
out perhaps ?
Introduction of atomic_inc_return could/should be a separate commit as well.
> # define atomic_add(x, v) ((void) __sync_add_and_fetch(&(x)->atomic, (v)))
> # define atomic_dec(x, v) ((void) __sync_sub_and_fetch(&(x)->atomic, (v)))
> # define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap
> (&(x)->atomic, oldv, newv)
> @@ -68,6 +69,7 @@ typedef struct {
> # define atomic_read(x) AO_load_full(&(x)->atomic)
> # define atomic_set(x, val) AO_store_full(&(x)->atomic, (val))
> # define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic))
> +# define atomic_inc_return(x) (AO_fetch_and_add1_full(&(x)->atomic) + 1)
> # define atomic_add(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, (v)))
> # define atomic_dec(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, -(v)))
> # define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1)
> @@ -91,6 +93,7 @@ typedef struct { LIBDRM_ATOMIC_TYPE atomic; } atomic_t;
> # define atomic_read(x) (int) ((x)->atomic)
> # define atomic_set(x, val) ((x)->atomic = (LIBDRM_ATOMIC_TYPE)(val))
> # define atomic_inc(x) (atomic_inc_uint (&(x)->atomic))
> +# define atomic_inc_return (atomic_inc_uint_nv(&(x)->atomic))
> # define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 0)
> # define atomic_add(x, v) (atomic_add_int(&(x)->atomic, (v)))
> # define atomic_dec(x, v) (atomic_add_int(&(x)->atomic, -(v)))
> @@ -112,3 +115,4 @@ static inline int atomic_add_unless(atomic_t *v, int add,
> int unless)
> }
>
> #endif
> +
Extra white-space.
IMHO sending the series for dri-devel might be nice, for at least the
xf86atomic.h changes.
Cheers
Emil
P.S. Bless you dude for going through the lovely experience to fix this.
_______________________________________________
Nouveau mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/nouveau