Order head and tail stores properly against CQE / SQE memory accesses.
Use <asm/barrier.h> instead of the io_uring "barrier.h" header file.

Cc: Roman Penyaev <[email protected]>
Cc: Stefan Hajnoczi <[email protected]>
Signed-off-by: Bart Van Assche <[email protected]>
---
 tools/io_uring/Makefile         |  2 +-
 tools/io_uring/barrier.h        | 16 ---------------
 tools/io_uring/io_uring-bench.c | 20 +++++++++---------
 tools/io_uring/liburing.h       |  9 ++++-----
 tools/io_uring/queue.c          | 36 +++++++++++----------------------
 5 files changed, 26 insertions(+), 57 deletions(-)
 delete mode 100644 tools/io_uring/barrier.h

diff --git a/tools/io_uring/Makefile b/tools/io_uring/Makefile
index 00f146c54c53..bbec91c6274c 100644
--- a/tools/io_uring/Makefile
+++ b/tools/io_uring/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for io_uring test tools
-CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE
+CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE -I../include
 LDLIBS += -lpthread
 
 all: io_uring-cp io_uring-bench
diff --git a/tools/io_uring/barrier.h b/tools/io_uring/barrier.h
deleted file mode 100644
index ef00f6722ba9..000000000000
--- a/tools/io_uring/barrier.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef LIBURING_BARRIER_H
-#define LIBURING_BARRIER_H
-
-#if defined(__x86_64) || defined(__i386__)
-#define read_barrier() __asm__ __volatile__("":::"memory")
-#define write_barrier()        __asm__ __volatile__("":::"memory")
-#else
-/*
- * Add arch appropriate definitions. Be safe and use full barriers for
- * archs we don't have support for.
- */
-#define read_barrier() __sync_synchronize()
-#define write_barrier()        __sync_synchronize()
-#endif
-
-#endif
diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
index 0f257139b003..29971a2a1c74 100644
--- a/tools/io_uring/io_uring-bench.c
+++ b/tools/io_uring/io_uring-bench.c
@@ -28,9 +28,9 @@
 #include <string.h>
 #include <pthread.h>
 #include <sched.h>
+#include <asm/barrier.h>
 
 #include "liburing.h"
-#include "barrier.h"
 
 #define min(a, b)              ((a < b) ? (a) : (b))
 
@@ -199,8 +199,7 @@ static int prep_more_ios(struct submitter *s, unsigned 
max_ios)
        next_tail = tail = *ring->tail;
        do {
                next_tail++;
-               read_barrier();
-               if (next_tail == *ring->head)
+               if (next_tail == smp_load_acquire(ring->head))
                        break;
 
                index = tail & sq_ring_mask;
@@ -211,10 +210,11 @@ static int prep_more_ios(struct submitter *s, unsigned 
max_ios)
        } while (prepped < max_ios);
 
        if (*ring->tail != tail) {
-               /* order tail store with writes to sqes above */
-               write_barrier();
-               *ring->tail = tail;
-               write_barrier();
+               /*
+                * Ensure that the kernel sees the SQE updates before it sees
+                * the tail update.
+                */
+               smp_store_release(ring->tail, tail);
        }
        return prepped;
 }
@@ -251,8 +251,7 @@ static int reap_events(struct submitter *s)
        do {
                struct file *f;
 
-               read_barrier();
-               if (head == *ring->tail)
+               if (head == smp_load_acquire(ring->tail))
                        break;
                cqe = &ring->cqes[head & cq_ring_mask];
                if (!do_nop) {
@@ -270,8 +269,7 @@ static int reap_events(struct submitter *s)
        } while (1);
 
        s->inflight -= reaped;
-       *ring->head = head;
-       write_barrier();
+       smp_store_release(ring->head, head);
        return reaped;
 }
 
diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
index 5f305c86b892..15b29bfac811 100644
--- a/tools/io_uring/liburing.h
+++ b/tools/io_uring/liburing.h
@@ -10,7 +10,7 @@ extern "C" {
 #include <string.h>
 #include "../../include/uapi/linux/io_uring.h"
 #include <inttypes.h>
-#include "barrier.h"
+#include <asm/barrier.h>
 
 /*
  * Library interface to io_uring
@@ -82,12 +82,11 @@ static inline void io_uring_cqe_seen(struct io_uring *ring,
        if (cqe) {
                struct io_uring_cq *cq = &ring->cq;
 
-               (*cq->khead)++;
                /*
-                * Ensure that the kernel sees our new head, the kernel has
-                * the matching read barrier.
+                * Ensure that the kernel only sees the new value of the head
+                * index after the CQEs have been read.
                 */
-               write_barrier();
+               smp_store_release(cq->khead, *cq->khead + 1);
        }
 }
 
diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c
index 321819c132c7..ada05bc74361 100644
--- a/tools/io_uring/queue.c
+++ b/tools/io_uring/queue.c
@@ -4,9 +4,9 @@
 #include <unistd.h>
 #include <errno.h>
 #include <string.h>
+#include <asm/barrier.h>
 
 #include "liburing.h"
-#include "barrier.h"
 
 static int __io_uring_get_cqe(struct io_uring *ring,
                              struct io_uring_cqe **cqe_ptr, int wait)
@@ -20,14 +20,12 @@ static int __io_uring_get_cqe(struct io_uring *ring,
        head = *cq->khead;
        do {
                /*
-                * It's necessary to use a read_barrier() before reading
-                * the CQ tail, since the kernel updates it locklessly. The
-                * kernel has the matching store barrier for the update. The
-                * kernel also ensures that previous stores to CQEs are ordered
+                * It's necessary to use smp_load_acquire() to read the CQ
+                * tail. The kernel uses smp_store_release() for updating the
+                * CQ tail to ensure that previous stores to CQEs are ordered
                 * with the tail update.
                 */
-               read_barrier();
-               if (head != *cq->ktail) {
+               if (head != smp_load_acquire(cq->ktail)) {
                        *cqe_ptr = &cq->cqes[head & mask];
                        break;
                }
@@ -73,12 +71,11 @@ int io_uring_submit(struct io_uring *ring)
        int ret;
 
        /*
-        * If we have pending IO in the kring, submit it first. We need a
-        * read barrier here to match the kernels store barrier when updating
-        * the SQ head.
+        * If we have pending IO in the kring, submit it first. We need
+        * smp_load_acquire() here to match the kernels smp_store_release()
+        * when updating the SQ head.
         */
-       read_barrier();
-       if (*sq->khead != *sq->ktail) {
+       if (smp_load_acquire(sq->khead) != *sq->ktail) {
                submitted = *sq->kring_entries;
                goto submit;
        }
@@ -94,7 +91,6 @@ int io_uring_submit(struct io_uring *ring)
        to_submit = sq->sqe_tail - sq->sqe_head;
        while (to_submit--) {
                ktail_next++;
-               read_barrier();
 
                sq->array[ktail & mask] = sq->sqe_head & mask;
                ktail = ktail_next;
@@ -108,18 +104,10 @@ int io_uring_submit(struct io_uring *ring)
 
        if (*sq->ktail != ktail) {
                /*
-                * First write barrier ensures that the SQE stores are updated
-                * with the tail update. This is needed so that the kernel
-                * will never see a tail update without the preceeding sQE
-                * stores being done.
+                * Use smp_store_release() so that the kernel will never see a
+                * tail update without the preceding sQE stores being done.
                 */
-               write_barrier();
-               *sq->ktail = ktail;
-               /*
-                * The kernel has the matching read barrier for reading the
-                * SQ tail.
-                */
-               write_barrier();
+               smp_store_release(sq->ktail, ktail);
        }
 
 submit:
-- 
2.23.0.rc1.153.gdeed80330f-goog

Reply via email to