On Tue, Aug 20, 2019 at 10:29:58AM -0700, Bart Van Assche wrote:
> Order head and tail stores properly against CQE / SQE memory accesses.
> Use <asm/barrier.h> instead of the io_uring "barrier.h" header file.

Where does this header file come from?

Linux has an asm-generic/barrier.h file which is not uapi and therefore
not installed in /usr/include.

I couldn't find an asm/barrier.h in the Debian packages collection
either.

> 
> Cc: Roman Penyaev <[email protected]>
> Cc: Stefan Hajnoczi <[email protected]>
> Signed-off-by: Bart Van Assche <[email protected]>
> ---
>  tools/io_uring/Makefile         |  2 +-
>  tools/io_uring/barrier.h        | 16 ---------------
>  tools/io_uring/io_uring-bench.c | 20 +++++++++---------
>  tools/io_uring/liburing.h       |  9 ++++-----
>  tools/io_uring/queue.c          | 36 +++++++++++----------------------
>  5 files changed, 26 insertions(+), 57 deletions(-)
>  delete mode 100644 tools/io_uring/barrier.h
> 
> diff --git a/tools/io_uring/Makefile b/tools/io_uring/Makefile
> index 00f146c54c53..bbec91c6274c 100644
> --- a/tools/io_uring/Makefile
> +++ b/tools/io_uring/Makefile
> @@ -1,6 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
>  # Makefile for io_uring test tools
> -CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE
> +CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE -I../include
>  LDLIBS += -lpthread
>  
>  all: io_uring-cp io_uring-bench
> diff --git a/tools/io_uring/barrier.h b/tools/io_uring/barrier.h
> deleted file mode 100644
> index ef00f6722ba9..000000000000
> --- a/tools/io_uring/barrier.h
> +++ /dev/null
> @@ -1,16 +0,0 @@
> -#ifndef LIBURING_BARRIER_H
> -#define LIBURING_BARRIER_H
> -
> -#if defined(__x86_64) || defined(__i386__)
> -#define read_barrier()       __asm__ __volatile__("":::"memory")
> -#define write_barrier()      __asm__ __volatile__("":::"memory")
> -#else
> -/*
> - * Add arch appropriate definitions. Be safe and use full barriers for
> - * archs we don't have support for.
> - */
> -#define read_barrier()       __sync_synchronize()
> -#define write_barrier()      __sync_synchronize()
> -#endif
> -
> -#endif
> diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
> index 0f257139b003..29971a2a1c74 100644
> --- a/tools/io_uring/io_uring-bench.c
> +++ b/tools/io_uring/io_uring-bench.c
> @@ -28,9 +28,9 @@
>  #include <string.h>
>  #include <pthread.h>
>  #include <sched.h>
> +#include <asm/barrier.h>
>  
>  #include "liburing.h"
> -#include "barrier.h"
>  
>  #define min(a, b)            ((a < b) ? (a) : (b))
>  
> @@ -199,8 +199,7 @@ static int prep_more_ios(struct submitter *s, unsigned 
> max_ios)
>       next_tail = tail = *ring->tail;
>       do {
>               next_tail++;
> -             read_barrier();
> -             if (next_tail == *ring->head)
> +             if (next_tail == smp_load_acquire(ring->head))
>                       break;
>  
>               index = tail & sq_ring_mask;
> @@ -211,10 +210,11 @@ static int prep_more_ios(struct submitter *s, unsigned 
> max_ios)
>       } while (prepped < max_ios);
>  
>       if (*ring->tail != tail) {
> -             /* order tail store with writes to sqes above */
> -             write_barrier();
> -             *ring->tail = tail;
> -             write_barrier();
> +             /*
> +              * Ensure that the kernel sees the SQE updates before it sees
> +              * the tail update.
> +              */
> +             smp_store_release(ring->tail, tail);
>       }
>       return prepped;
>  }
> @@ -251,8 +251,7 @@ static int reap_events(struct submitter *s)
>       do {
>               struct file *f;
>  
> -             read_barrier();
> -             if (head == *ring->tail)
> +             if (head == smp_load_acquire(ring->tail))
>                       break;
>               cqe = &ring->cqes[head & cq_ring_mask];
>               if (!do_nop) {
> @@ -270,8 +269,7 @@ static int reap_events(struct submitter *s)
>       } while (1);
>  
>       s->inflight -= reaped;
> -     *ring->head = head;
> -     write_barrier();
> +     smp_store_release(ring->head, head);
>       return reaped;
>  }
>  
> diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
> index 5f305c86b892..15b29bfac811 100644
> --- a/tools/io_uring/liburing.h
> +++ b/tools/io_uring/liburing.h
> @@ -10,7 +10,7 @@ extern "C" {
>  #include <string.h>
>  #include "../../include/uapi/linux/io_uring.h"
>  #include <inttypes.h>
> -#include "barrier.h"
> +#include <asm/barrier.h>
>  
>  /*
>   * Library interface to io_uring
> @@ -82,12 +82,11 @@ static inline void io_uring_cqe_seen(struct io_uring 
> *ring,
>       if (cqe) {
>               struct io_uring_cq *cq = &ring->cq;
>  
> -             (*cq->khead)++;
>               /*
> -              * Ensure that the kernel sees our new head, the kernel has
> -              * the matching read barrier.
> +              * Ensure that the kernel only sees the new value of the head
> +              * index after the CQEs have been read.
>                */
> -             write_barrier();
> +             smp_store_release(cq->khead, *cq->khead + 1);
>       }
>  }
>  
> diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c
> index 321819c132c7..ada05bc74361 100644
> --- a/tools/io_uring/queue.c
> +++ b/tools/io_uring/queue.c
> @@ -4,9 +4,9 @@
>  #include <unistd.h>
>  #include <errno.h>
>  #include <string.h>
> +#include <asm/barrier.h>
>  
>  #include "liburing.h"
> -#include "barrier.h"
>  
>  static int __io_uring_get_cqe(struct io_uring *ring,
>                             struct io_uring_cqe **cqe_ptr, int wait)
> @@ -20,14 +20,12 @@ static int __io_uring_get_cqe(struct io_uring *ring,
>       head = *cq->khead;
>       do {
>               /*
> -              * It's necessary to use a read_barrier() before reading
> -              * the CQ tail, since the kernel updates it locklessly. The
> -              * kernel has the matching store barrier for the update. The
> -              * kernel also ensures that previous stores to CQEs are ordered
> +              * It's necessary to use smp_load_acquire() to read the CQ
> +              * tail. The kernel uses smp_store_release() for updating the
> +              * CQ tail to ensure that previous stores to CQEs are ordered
>                * with the tail update.
>                */
> -             read_barrier();
> -             if (head != *cq->ktail) {
> +             if (head != smp_load_acquire(cq->ktail)) {
>                       *cqe_ptr = &cq->cqes[head & mask];
>                       break;
>               }
> @@ -73,12 +71,11 @@ int io_uring_submit(struct io_uring *ring)
>       int ret;
>  
>       /*
> -      * If we have pending IO in the kring, submit it first. We need a
> -      * read barrier here to match the kernels store barrier when updating
> -      * the SQ head.
> +      * If we have pending IO in the kring, submit it first. We need
> +      * smp_load_acquire() here to match the kernels smp_store_release()
> +      * when updating the SQ head.
>        */
> -     read_barrier();
> -     if (*sq->khead != *sq->ktail) {
> +     if (smp_load_acquire(sq->khead) != *sq->ktail) {
>               submitted = *sq->kring_entries;
>               goto submit;
>       }
> @@ -94,7 +91,6 @@ int io_uring_submit(struct io_uring *ring)
>       to_submit = sq->sqe_tail - sq->sqe_head;
>       while (to_submit--) {
>               ktail_next++;
> -             read_barrier();
>  
>               sq->array[ktail & mask] = sq->sqe_head & mask;
>               ktail = ktail_next;
> @@ -108,18 +104,10 @@ int io_uring_submit(struct io_uring *ring)
>  
>       if (*sq->ktail != ktail) {
>               /*
> -              * First write barrier ensures that the SQE stores are updated
> -              * with the tail update. This is needed so that the kernel
> -              * will never see a tail update without the preceeding sQE
> -              * stores being done.
> +              * Use smp_store_release() so that the kernel will never see a
> +              * tail update without the preceding sQE stores being done.
>                */
> -             write_barrier();
> -             *sq->ktail = ktail;
> -             /*
> -              * The kernel has the matching read barrier for reading the
> -              * SQ tail.
> -              */
> -             write_barrier();
> +             smp_store_release(sq->ktail, ktail);
>       }
>  
>  submit:
> -- 
> 2.23.0.rc1.153.gdeed80330f-goog
> 

Attachment: signature.asc
Description: PGP signature

Reply via email to