The number of slots per page being 511 (i.e. not a power of two) means
that the (32-bit) read and write indexes going beyond 2^32 will likely
disturb operation. The hypervisor side gets I/O req server creation
extended so we can indicate that we're using suitable atomic accesses
where needed (not all accesses to the two pointers really need to be
atomic), allowing it to atomically canonicalize both pointers when both
have gone through at least one cycle.

The Xen side counterpart (which is not a functional prereq to this
change, albeit the intention is for Xen to assume default servers
always use suitable atomic accesses) can be found at e.g.
http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg02996.html 

Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- a/i386-dm/helper2.c
+++ b/i386-dm/helper2.c
@@ -493,10 +493,19 @@ static int __handle_buffered_iopage(CPUS
 
     memset(&req, 0x00, sizeof(req));
 
-    while (buffered_io_page->read_pointer !=
-           buffered_io_page->write_pointer) {
-        buf_req = &buffered_io_page->buf_ioreq[
-            buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
+    for (;;) {
+        uint32_t rdptr = buffered_io_page->read_pointer, wrptr;
+
+        xen_rmb();
+        wrptr = buffered_io_page->write_pointer;
+        xen_rmb();
+        if (rdptr != buffered_io_page->read_pointer) {
+            continue;
+        }
+        if (rdptr == wrptr) {
+            break;
+        }
+        buf_req = &buffered_io_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
         req.size = 1UL << buf_req->size;
         req.count = 1;
         req.addr = buf_req->addr;
@@ -508,15 +517,14 @@ static int __handle_buffered_iopage(CPUS
         req.data_is_ptr = 0;
         qw = (req.size == 8);
         if (qw) {
-            buf_req = &buffered_io_page->buf_ioreq[
-                (buffered_io_page->read_pointer+1) % IOREQ_BUFFER_SLOT_NUM];
+            buf_req = &buffered_io_page->buf_ioreq[(rdptr + 1) %
+                                                   IOREQ_BUFFER_SLOT_NUM];
             req.data |= ((uint64_t)buf_req->data) << 32;
         }
 
         __handle_ioreq(env, &req);
 
-        xen_mb();
-        buffered_io_page->read_pointer += qw ? 2 : 1;
+        __sync_fetch_and_add(&buffered_io_page->read_pointer, qw + 1);
     }
 
     return req.count;



HVM: atomically access pointers in bufioreq handling

The number of slots per page being 511 (i.e. not a power of two) means
that the (32-bit) read and write indexes going beyond 2^32 will likely
disturb operation. The hypervisor side gets I/O req server creation
extended so we can indicate that we're using suitable atomic accesses
where needed (not all accesses to the two pointers really need to be
atomic), allowing it to atomically canonicalize both pointers when both
have gone through at least one cycle.

The Xen side counterpart (which is not a functional prereq to this
change, albeit the intention is for Xen to assume default servers
always use suitable atomic accesses) can be found at e.g.
http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg02996.html

Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- a/i386-dm/helper2.c
+++ b/i386-dm/helper2.c
@@ -493,10 +493,19 @@ static int __handle_buffered_iopage(CPUS
 
     memset(&req, 0x00, sizeof(req));
 
-    while (buffered_io_page->read_pointer !=
-           buffered_io_page->write_pointer) {
-        buf_req = &buffered_io_page->buf_ioreq[
-            buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
+    for (;;) {
+        uint32_t rdptr = buffered_io_page->read_pointer, wrptr;
+
+        xen_rmb();
+        wrptr = buffered_io_page->write_pointer;
+        xen_rmb();
+        if (rdptr != buffered_io_page->read_pointer) {
+            continue;
+        }
+        if (rdptr == wrptr) {
+            break;
+        }
+        buf_req = &buffered_io_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
         req.size = 1UL << buf_req->size;
         req.count = 1;
         req.addr = buf_req->addr;
@@ -508,15 +517,14 @@ static int __handle_buffered_iopage(CPUS
         req.data_is_ptr = 0;
         qw = (req.size == 8);
         if (qw) {
-            buf_req = &buffered_io_page->buf_ioreq[
-                (buffered_io_page->read_pointer+1) % IOREQ_BUFFER_SLOT_NUM];
+            buf_req = &buffered_io_page->buf_ioreq[(rdptr + 1) %
+                                                   IOREQ_BUFFER_SLOT_NUM];
             req.data |= ((uint64_t)buf_req->data) << 32;
         }
 
         __handle_ioreq(env, &req);
 
-        xen_mb();
-        buffered_io_page->read_pointer += qw ? 2 : 1;
+        __sync_fetch_and_add(&buffered_io_page->read_pointer, qw + 1);
     }
 
     return req.count;
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Reply via email to