Module Name: src Committed By: riastradh Date: Fri Nov 29 22:17:24 UTC 2019
Modified Files: src/distrib/sets/lists/comp: mi src/share/man/man9: Makefile src/sys/sys: atomic.h Added Files: src/share/man/man9: atomic_loadstore.9 Log Message: New atomic load/store operations for the kernel. Guarantee no fusing and no tearing, and can optionally impose ordering relative to other memory operations. Unordered: - atomic_load_relaxed - atomic_store_relaxed Ordered: - atomic_load_acquire - atomic_load_consume - atomic_store_release These are intended to match C11 semantics, and can be defined in terms of the C11 atomic API when ready. To generate a diff of this commit: cvs rdiff -u -r1.2293 -r1.2294 src/distrib/sets/lists/comp/mi cvs rdiff -u -r1.442 -r1.443 src/share/man/man9/Makefile cvs rdiff -u -r0 -r1.1 src/share/man/man9/atomic_loadstore.9 cvs rdiff -u -r1.17 -r1.18 src/sys/sys/atomic.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/distrib/sets/lists/comp/mi diff -u src/distrib/sets/lists/comp/mi:1.2293 src/distrib/sets/lists/comp/mi:1.2294 --- src/distrib/sets/lists/comp/mi:1.2293 Fri Nov 29 20:31:35 2019 +++ src/distrib/sets/lists/comp/mi Fri Nov 29 22:17:23 2019 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.2293 2019/11/29 20:31:35 riastradh Exp $ +# $NetBSD: mi,v 1.2294 2019/11/29 22:17:23 riastradh Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. ./etc/mtree/set.comp comp-sys-root @@ -10704,6 +10704,12 @@ ./usr/share/man/cat9/arp_ifinit.0 comp-sys-catman .cat ./usr/share/man/cat9/arpintr.0 comp-sys-catman .cat ./usr/share/man/cat9/arpresolve.0 comp-sys-catman .cat +./usr/share/man/cat9/atomic_load_acquire.0 comp-sys-catman .cat +./usr/share/man/cat9/atomic_load_consume.0 comp-sys-catman .cat +./usr/share/man/cat9/atomic_load_relaxed.0 comp-sys-catman .cat +./usr/share/man/cat9/atomic_loadstore.0 comp-sys-catman .cat +./usr/share/man/cat9/atomic_store_relaxed.0 comp-sys-catman .cat +./usr/share/man/cat9/atomic_store_release.0 comp-sys-catman .cat ./usr/share/man/cat9/atop.0 comp-sys-catman .cat ./usr/share/man/cat9/audio.0 comp-sys-catman .cat ./usr/share/man/cat9/audio_system.0 comp-sys-catman obsolete @@ -18652,6 +18658,12 @@ ./usr/share/man/html9/arp_ifinit.html comp-sys-htmlman html ./usr/share/man/html9/arpintr.html comp-sys-htmlman html ./usr/share/man/html9/arpresolve.html comp-sys-htmlman html +./usr/share/man/html9/atomic_load_acquire.html comp-sys-htmlman html +./usr/share/man/html9/atomic_load_consume.html comp-sys-htmlman html +./usr/share/man/html9/atomic_load_relaxed.html comp-sys-htmlman html +./usr/share/man/html9/atomic_loadstore.html comp-sys-htmlman html +./usr/share/man/html9/atomic_store_relaxed.html comp-sys-htmlman html +./usr/share/man/html9/atomic_store_release.html comp-sys-htmlman html ./usr/share/man/html9/atop.html comp-sys-htmlman html ./usr/share/man/html9/audio.html comp-sys-htmlman html ./usr/share/man/html9/audio_system.html comp-sys-htmlman obsolete @@ -26703,6 +26715,12 @@ ./usr/share/man/man9/arp_ifinit.9 comp-sys-man .man ./usr/share/man/man9/arpintr.9 comp-sys-man .man ./usr/share/man/man9/arpresolve.9 comp-sys-man .man +./usr/share/man/man9/atomic_load_acquire.9 comp-sys-man .man +./usr/share/man/man9/atomic_load_consume.9 comp-sys-man .man +./usr/share/man/man9/atomic_load_relaxed.9 comp-sys-man .man +./usr/share/man/man9/atomic_loadstore.9 comp-sys-man .man +./usr/share/man/man9/atomic_store_relaxed.9 comp-sys-man .man +./usr/share/man/man9/atomic_store_release.9 comp-sys-man .man ./usr/share/man/man9/atop.9 comp-sys-man .man ./usr/share/man/man9/audio.9 comp-sys-man .man ./usr/share/man/man9/audio_system.9 comp-sys-man obsolete Index: src/share/man/man9/Makefile diff -u src/share/man/man9/Makefile:1.442 src/share/man/man9/Makefile:1.443 --- src/share/man/man9/Makefile:1.442 Fri Nov 29 20:31:35 2019 +++ src/share/man/man9/Makefile Fri Nov 29 22:17:23 2019 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.442 2019/11/29 20:31:35 riastradh Exp $ +# $NetBSD: Makefile,v 1.443 2019/11/29 22:17:23 riastradh Exp $ # Makefile for section 9 (kernel function and variable) manual pages. @@ -67,6 +67,13 @@ MAN= accept_filter.9 accf_data.9 accf_ht wsbell.9 wscons.9 wsdisplay.9 wsfont.9 wskbd.9 wsmouse.9 \ xcall.9 +MAN+= atomic_loadstore.9 +MLINKS+=atomic_loadstore.9 atomic_load_acquire.9 \ + atomic_loadstore.9 atomic_load_consume.9 \ + atomic_loadstore.9 atomic_load_relaxed.9 \ + atomic_loadstore.9 atomic_store_relaxed.9 \ + atomic_loadstore.9 atomic_store_release.9 + MAN+= boothowto.9 MLINKS+=boothowto.9 BOOT_FLAG.9 Index: src/sys/sys/atomic.h diff -u src/sys/sys/atomic.h:1.17 src/sys/sys/atomic.h:1.18 --- src/sys/sys/atomic.h:1.17 Thu Nov 14 16:23:53 2019 +++ src/sys/sys/atomic.h Fri Nov 29 22:17:23 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: atomic.h,v 1.17 2019/11/14 16:23:53 maxv Exp $ */ +/* $NetBSD: atomic.h,v 1.18 2019/11/29 22:17:23 riastradh Exp $ */ /*- * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. @@ -383,4 +383,109 @@ __END_DECLS #define atomic_inc_64_nv kmsan_atomic_inc_64_nv #endif +#ifdef _KERNEL + +#if 1 // XXX: __STDC_VERSION__ < 201112L + +/* Pre-C11 definitions */ + +#include <sys/cdefs.h> +#include <sys/bitops.h> + +#ifdef _LP64 +#define __HAVE_ATOMIC64_LOADSTORE 1 +#define __ATOMIC_SIZE_MAX 8 +#else +#define __ATOMIC_SIZE_MAX 4 +#endif + +/* + * We assume that access to an aligned pointer to a volatile object of + * at most __ATOMIC_SIZE_MAX bytes is guaranteed to be atomic. This is + * an assumption that may be wrong, but we hope it won't be wrong + * before we just adopt the C11 atomic API. + */ +#define __ATOMIC_PTR_CHECK(p) do \ +{ \ + CTASSERT(sizeof(*(p)) <= __ATOMIC_SIZE_MAX); \ + KASSERT(((uintptr_t)(p) & ilog2(sizeof(*(p)))) == 0); \ +} while (0) + +#define atomic_load_relaxed(p) \ +({ \ + const volatile __typeof__(*(p)) *__al_ptr = (p); \ + __ATOMIC_PTR_CHECK(__al_ptr); \ + *__al_ptr; \ +}) + +#define atomic_load_consume(p) \ +({ \ + const volatile __typeof__(*(p)) *__al_ptr = (p); \ + __ATOMIC_PTR_CHECK(__al_ptr); \ + __typeof__(*(p)) __al_val = *__al_ptr; \ + membar_datadep_consumer(); \ + __al_val; \ +}) + +/* + * We want {loads}-before-{loads,stores}. It is tempting to use + * membar_enter, but that provides {stores}-before-{loads,stores}, + * which may not help. So we must use membar_sync, which does the + * slightly stronger {loads,stores}-before-{loads,stores}. + */ +#define atomic_load_acquire(p) \ +({ \ + const volatile __typeof__(*(p)) *__al_ptr = (p); \ + __ATOMIC_PTR_CHECK(__al_ptr); \ + __typeof__(*(p)) __al_val = *__al_ptr; \ + membar_sync(); \ + __al_val; \ +}) + +#define atomic_store_relaxed(p,v) \ +({ \ + volatile __typeof__(*(p)) *__as_ptr = (p); \ + __ATOMIC_PTR_CHECK(__as_ptr); \ + *__as_ptr = (v); \ +}) + +#define atomic_store_release(p,v) \ +({ \ + volatile __typeof__(*(p)) *__as_ptr = (p); \ + __typeof__(*(p)) __as_val = (v); \ + __ATOMIC_PTR_CHECK(__as_ptr); \ + membar_exit(); \ + *__as_ptr = __as_val; \ +}) + +#else /* __STDC_VERSION__ >= 201112L */ + +/* C11 definitions, not yet available */ + +#include <stdatomic.h> + +#define atomic_load_relaxed(p) \ + atomic_load_explicit((p), memory_order_relaxed) +#if 0 /* memory_order_consume is not there yet */ +#define atomic_load_consume(p) \ + atomic_load_explicit((p), memory_order_consume) +#else +#define atomic_load_consume(p) \ +({ \ + const __typeof__(*(p)) __al_val = atomic_load_relaxed(p); \ + membar_datadep_consumer(); \ + __al_val; \ +}) +#endif +#define atomic_load_acquire(p) \ + atomic_load_explicit((p), memory_order_acquire) +#define atomic_store_relaxed(p, v) \ + atomic_store_explicit((p), (v), memory_order_relaxed) +#define atomic_store_release(p, v) \ + atomic_store_explicit((p), (v), memory_order_release) + +#endif /* __STDC_VERSION__ */ + +#endif /* _KERNEL */ + #endif /* ! _SYS_ATOMIC_H_ */ Added files: Index: src/share/man/man9/atomic_loadstore.9 diff -u /dev/null src/share/man/man9/atomic_loadstore.9:1.1 --- /dev/null Fri Nov 29 22:17:24 2019 +++ src/share/man/man9/atomic_loadstore.9 Fri Nov 29 22:17:23 2019 @@ -0,0 +1,744 @@ +.\" $NetBSD: atomic_loadstore.9,v 1.1 2019/11/29 22:17:23 riastradh Exp $ +.\" +.\" Copyright (c) 2019 The NetBSD Foundation +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Taylor R. Campbell. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd November 25, 2019 +.Dt ATOMIC_LOADSTORE 9 +.Os +.Sh NAME +.Nm atomic_load_relaxed , +.Nm atomic_load_acquire , +.Nm atomic_load_consume , +.Nm atomic_store_relaxed , +.Nm atomic_store_release +.Nd atomic and ordered memory operations +.Sh SYNOPSIS +.In sys/atomic.h +.Ft T +.Fn atomic_load_relaxed "const volatile T *p" +.Ft T +.Fn atomic_load_acquire "const volatile T *p" +.Ft T +.Fn atomic_load_consume "const volatile T *p" +.Ft void +.Fn atomic_store_relaxed "volatile T *p" "T v" +.Ft void +.Fn atomic_store_release "volatile T *p" "T v" +.Sh DESCRIPTION +These type-generic macros implement memory operations that are +.Em atomic +and that have +.Em memory ordering constraints . +Aside from atomicity and ordering, the load operations are equivalent +to +.Li * Ns Fa p +and the store operations are equivalent to +.Li * Ns Fa p Li "=" Fa v . +The pointer +.Fa p +must be aligned, even on architectures like x86 which generally lack +strict alignment requirements; see +.Sx SIZE AND ALIGNMENT +for details. +.Pp +.Em Atomic +means that the memory operations cannot be +.Em fused +or +.Em torn : +.Bl -bullet +.It +.Em Fusing +is combining multiple memory operations on a single object into one +memory operation, such as replacing +.Bd -literal -compact + *p = v; + x = *p; +.Ed +by +.Bd -literal -compact + *p = v; + x = v; +.Ed +since the compiler can prove that +.Li * Ns Fa p +will yield +.Fa v +after +.Li * Ns Fa p Li = Fa v . +For +.Em atomic +memory operations, the implementation +.Em will not +assume that +.Bl -dash -compact +.It +consecutive loads of the same object will return the same value, or +.It +a store followed by a load of the same object will return the value +stored, or +.It +consecutive stores of the same object are redundant. +.El +Thus, the implementation will not replace two consecutive atomic loads +by one, will not elide an atomic load following a store, and will not +combine two consecutive atomic stores into one. +.Pp +For example, +.Bd -literal + atomic_store_relaxed(&flag, 1); + while (atomic_load_relaxed(&flag)) + continue; +.Ed +.Pp +may be used to set a flag and then busy-wait until another thread +clears it, whereas +.Bd -literal + flag = 1; + while (flag) + continue; +.Ed +.Pp +may be transformed into the infinite loop +.Bd -literal + flag = 1; + while (1) + continue; +.Ed +.It +.Em Tearing +is implementing a memory operation on a large data unit such as a +32-bit word by issuing multiple memory operations on smaller data units +such as 8-bit bytes. +The implementation will not tear +.Em atomic +loads or stores into smaller ones. +Thus, as far as any interrupt, other thread, or other CPU can tell, an +atomic memory operation is issued either all at once or not at all. +.Pp +For example, if a 32-bit word +.Fa w +is written with +.Li atomic_store_relaxed(& Ns Fa w Ns Li "," 0x00010002) , +then an interrupt, other thread, or other CPU reading it with +.Li atomic_load_relaxed(& Ns Fa w Ns Li ")" +will never witness it partially written, whereas +.Fa w Li = 0x00010002 +might be compiled into a pair of separate 16-bit store instructions +instead of one single word-sized store instruction, in which case other +threads may see the intermediate state with only one of the halves +written. +.El +.Pp +Atomic operations on any single object occur in a total order shared by +all interrupts, threads, and CPUs, which is consistent with the program +order in every interrupt, thread, and CPU. +A single program without interruption or other threads or CPUs will +always observe its own loads and stores in program order, but another +program in an interrupt handler, in another thread, or on another CPU +may issue loads that return values as if the first program's stores +occurred out of program order, and vice versa. +Two different threads might each observe a third thread's memory +operations in different orders. +.Pp +The +.Em memory ordering constraints +make limited guarantees of ordering relative to memory operations on +.Em other +objects as witnessed by interrupts, other threads, or other CPUs, and +have the following meanings: +.Bl -tag -width relaxed +.It relaxed +No ordering relative to memory operations on any other objects is +guaranteed. +Relaxed ordering is the default for ordinary non-atomic memory +operations like +.Li * Ns Fa p +and +.Li * Ns Fa p Li = Fa v . +.Pp +Atomic operations with relaxed ordering are cheap: they are not +read/modify/write atomic operations, and they do not involve any kind +of inter-CPU ordering barriers. +.It acquire +This memory operation happens before all subsequent memory operations +in program order. +However, prior memory operations in program order may be reordered to +happen after this one. +For example, assuming no aliasing between the pointers, the +implementation is allowed to treat +.Bd -literal + int x = *p; + if (atomic_load_acquire(q)) { + int y = *r; + *s = x + y; + return 1; + } +.Ed +.Pp +as if it were +.Bd -literal + if (atomic_load_acquire(q)) { + int x = *p; + int y = *r; + *s = x + y; + return 1; + } +.Ed +.Pp +but +.Em not +as if it were +.Bd -literal + int x = *p; + int y = *r; + *s = x + y; + if (atomic_load_acquire(q)) { + return 1; + } +.Ed +.It consume +This memory operation happens before all memory operations on objects +at addresses that are computed from the value returned by this one. +Otherwise, no ordering relative to memory operations on other objects +is implied. +.Pp +For example, the implementation is allowed to treat +.Bd -literal + struct foo *foo0, *foo1; + + struct foo *f0 = atomic_load_consume(&foo0); + struct foo *f1 = atomic_load_consume(&foo1); + int x = f0->x; + int y = f1->y; +.Ed +.Pp +as if it were +.Bd -literal + struct foo *foo0, *foo1; + + struct foo *f1 = atomic_load_consume(&foo1); + struct foo *f0 = atomic_load_consume(&foo0); + int y = f1->y; + int x = f0->x; +.Ed +.Pp +but loading +.Li f0->x +is guaranteed to happen after loading +.Li foo0 +even if the CPU had a cached value for the address that +.Li f0->x +happened to be at, and likewise for +.Li f1->y +and +.Li foo1 . +.Pp +.Fn atomic_load_consume +functions like +.Fn atomic_load_acquire +as long as the memory operations that must happen after it are limited +to addresses that depend on the value returned by it, but it is almost +always as cheap as +.Fn atomic_load_relaxed . +See +.Sx ACQUIRE OR CONSUME? +below for more details. +.It release +All prior memory operations in program order happen before this one. +However, subsequent memory operations in program order may be reordered +to happen before this one too. +For example, assuming no aliasing between the pointers, the +implementation is allowed to treat +.Bd -literal + int x = *p; + *q = x; + atomic_store_release(r, 0); + int y = *s; + return x + y; +.Ed +.Pp +as if it were +.Bd -literal + int y = *s; + int x = *p; + *q = x; + atomic_store_release(r, 0); + return x + y; +.Ed +.Pp +but +.Em not +as if it were +.Bd -literal + atomic_store_release(r, 0); + int x = *p; + int y = *s; + *q = x; + return x + y; +.Ed +.El +.Ss PAIRING ORDERED MEMORY OPERATIONS +In general, each +.Fn atomic_store_release +.Em must +be paired with either +.Fn atomic_load_acquire +or +.Fn atomic_load_consume +in order to have an effect \(em it is only when a release operation +synchronizes with an acquire or consume operation that any ordering +guaranteed between memory operations +.Em before +the release operation and memory operations +.Em after +the acquire/consume operation. +.Pp +For example, to set up an entry in a table and then mark the entry +ready, you should: +.Bl -enum +.It +Perform memory operations to initialize the data. +.Bd -literal + tab[i].x = ...; + tab[i].y = ...; +.Ed +.It +Issue +.Fn atomic_store_release +to mark it ready. +.Bd -literal + atomic_store_release(&tab[i].ready, 1); +.Ed +.It +Possibly in another thread, issue +.Fn atomic_load_acquire +to ascertain whether it is ready. +.Bd -literal + if (atomic_load_acquire(&tab[i].ready) == 0) + return EWOULDBLOCK; +.Ed +.It +Perform memory operations to use the data. +.Bd -literal + do_stuff(tab[i].x, tab[i].y); +.Ed +.El +.Pp +Similarly, if you want to create an object, initialize it, and then +publish it to be used by another thread, then you should: +.Bl -enum +.It +Perform memory operations to initialize the object. +.Bd -literal + struct mumble *m = kmem_alloc(sizeof(*m), KM_SLEEP); + m->x = x; + m->y = y; + m->z = m->x + m->y; +.Ed +.It +Issue +.Fn atomic_store_release +to publish it. +.Bd -literal + atomic_store_release(&the_mumble, m); +.Ed +.It +Possibly in another thread, issue +.Fn atomic_load_consume +to get it. +.Bd -literal + struct mumble *m = atomic_load_consume(&the_mumble); +.Ed +.It +Perform memory operations to use the object's members. +.Bd -literal + m->y &= m->x; + do_things(m->x, m->y, m->z); +.Ed +.El +.Pp +In both examples, assuming that the value written by +.Fn atomic_store_release +in step\~2 +is read by +.Fn atomic_load_acquire +or +.Fn atomic_load_consume +in step\~3, this guarantees that all of the memory operations in +step\~1 complete before any of the memory operations in step\~4 \(em +even if they happen on different CPUs. +.Pp +Without +.Em both +the release operation in step\~2 +.Em and +the acquire or consume operation in step\~3, no ordering is guaranteed +between the memory operations in steps\~1 and\~4. +In fact, without +.Em both +release and acquire/consume, even the assignment +.Li m->z = m->x + m->y +in step\~1 might read values of +.Li m->x +and +.Li m->y +that were written in step\~4. +.Ss ACQUIRE OR CONSUME? +You must use +.Fn atomic_load_acquire +when subsequent memory operations in program order that must happen +after the load are on objects at +.Em addresses that might not depend arithmetically on the resulting value . +This applies particularly when the choice of whether to do the +subsequent memory operation depends on a +.Em control-flow decision based on the resulting value : +.Bd -literal + struct gadget { + int ready, x; + } the_gadget; + + /* Producer */ + the_gadget.x = 42; + atomic_store_release(&the_gadget.ready, 1); + + /* Consumer */ + if (atomic_load_acquire(&the_gadget.ready) == 0) + return EWOULDBLOCK; + int x = the_gadget.x; +.Ed +.Pp +Here the +.Em decision of whether to load +.Li the_gadget.x +depends on a control-flow decision depending on value loaded from +.Li the_gadget.ready , +and loading +.Li the_gadget.x +must happen after loading +.Li the_gadget.ready . +Using +.Fn atomic_load_acquire +guarantees that the compiler and CPU do not conspire to load +.Li the_gadget.x +before we have ascertained that it is ready. +.Pp +You may use +.Fn atomic_load_consume +if all subsequent memory operations in program order that must happen +after the load are performed on objects at +.Em addresses computed arithmetically from the resulting value , +such as loading a pointer to a structure object and then dereferencing +it: +.Bd -literal + struct gizmo { + int x, y, z; + }; + struct gizmo null_gizmo; + struct gizmo *the_gizmo = &null_gizmo; + + /* Producer */ + struct gizmo *g = kmem_alloc(sizeof(*g), KM_SLEEP); + g->x = 12; + g->y = 34; + g->z = 56; + atomic_store_release(&the_gizmo, g); + + /* Consumer */ + struct gizmo *g = atomic_load_consume(&the_gizmo); + int y = g->y; +.Ed +.Pp +Here the +.Em address +of +.Li g->y +depends on the value of the pointer loaded from +.Li the_gizmo . +Using +.Fn atomic_load_consume +guarantees that we do not witness a stale cache for that address. +.Pp +In some cases it may be unclear. +For example: +.Bd -literal + int x[2]; + bool b; + + /* Producer */ + x[0] = 42; + atomic_store_release(&b, 0); + + /* Consumer 1 */ + int y = atomic_load_???(&b) ? x[0] : x[1]; + + /* Consumer 2 */ + int y = x[atomic_load_???(&b) ? 0 : 1]; + + /* Consumer 3 */ + int y = x[atomic_load_???(&b) ^ 1]; +.Ed +.Pp +Although the three consumers seem to be equivalent, by the letter of +C11 consumers\~1 and\~2 require +.Fn atomic_load_acquire +because the value determines the address of a subsequent load only via +control-flow decisions in the +.Li ?: +operator, whereas consumer\~3 can use +.Fn atomic_load_consume . +However, if you're not sure, you should err on the side of +.Fn atomic_load_acquire +until C11 implementations have ironed out the kinks in the semantics. +.Pp +On all CPUs other than DEC Alpha, +.Fn atomic_load_consume +is cheap \(em it is identical to +.Fn atomic_load_relaxed . +In contrast, +.Fn atomic_load_acquire +usually implies an expensive memory barrier. +.Ss SIZE AND ALIGNMENT +The pointer +.Fa p +must be aligned \(em that is, if the object it points to is +.No 2^ Ns Fa n +bytes long, then the low-order +.Fa n +bits of +.Fa p +must be zero. +.Pp +All +.Nx +ports support atomic loads and stores on units of data up to 32 bits. +Some ports additionally support atomic loads and stores on larger +quantities, like 64-bit quantities, if +.Dv __HAVE_ATOMIC64_LOADSTORE +is defined. +The macros are not allowed on larger quantities of data than the port +supports atomically; attempts to use them for such quantities should +result in a compile-time assertion failure. +.Pp +For example, as long as you use +.Fn atomic_store_* +to write a 32-bit quantity, you can safely use +.Fn atomic_load_relaxed +to optimistically read it outside a lock, but for a 64-bit quantity it +must be conditional on +.Dv __HAVE_ATOMIC64_LOADSTORE +\(em otherwise it will lead to compile-time errors on platforms without +64-bit atomic loads and stores: +.Bd -literal + struct foo { + kmutex_t f_lock; + uint32_t f_refcnt; + uint64_t f_ticket; + }; + + if (atomic_load_relaxed(&foo->f_refcnt) == 0) + return 123; +#ifdef __HAVE_ATOMIC64_LOADSTORE + if (atomic_load_relaxed(&foo->f_ticket) == ticket) + return 123; +#endif + mutex_enter(&foo->f_lock); + if (foo->f_refcnt == 0 || foo->f_ticket == ticket) + ret = 123; + ... +#ifdef __HAVE_ATOMIC64_LOADSTORE + atomic_store_relaxed(&foo->f_ticket, foo->f_ticket + 1); +#else + foo->f_ticket++; +#endif + ... + mutex_exit(&foo->f_lock); +.Ed +.Sh C11 COMPATIBILITY +These macros are meant to follow +.Tn C11 +semantics, in terms of +.Fn atomic_load_explicit +and +.Fn atomic_store_explicit +with the appropriate memory order specifiers, and are meant to make +future adoption of the +.Tn C11 +atomic API easier. +Eventually it may be mandatory to use the +.Tn C11 +.Vt _Atomic +type qualifier or equivalent for the operands. +.Sh LINUX ANALOGUES +The Linux kernel provides two macros +.Fn READ_ONCE x +and +.Fn WRITE_ONCE x v +which are similar to +.Li atomic_load_consume(& Ns Fa x Ns Li ")" +and +.Li atomic_store_relaxed(& Ns Fa x Ns Li "," Fa v Ns Li ")" , +respectively. +However, while Linux's +.Fn READ_ONCE +and +.Fn WRITE_ONCE +prevent fusing, they may in some cases be torn \(em and therefore fail +to guarantee atomicity \(em because: +.Bl -bullet +.It +They do not require the address +.Li & Ns Fa x +to be aligned. +.It +They do not require +.Li sizeof( Ns Fa x Ns Li ")" +to be at most the largest size of available atomic loads and stores on +the host architecture. +.El +.Sh EXAMPLES +Maintaining lossy counters. +These may lose some counts, because the read/modify/write cycle as a +whole is not atomic. +But this guarantees that the count will increase by at most one each +time. +In contrast, without atomic operations, in principle a write to a +32-bit counter might be torn into multiple smaller stores, which could +appear to happen out of order from another CPU's perspective, leading +to nonsensical counter readouts. +(For frequent events, consider using per-CPU counters instead in +practice.) +.Bd -literal + unsigned count; + + void + record_event(void) + { + atomic_store_relaxed(&count, + 1 + atomic_load_relaxed(&count)); + } + + unsigned + read_event_count(void) + { + + return atomic_load_relaxed(&count); + } +.Ed +.Pp +Initialization barrier. +.Bd -literal + int ready; + struct data d; + + void + setup_and_notify(void) + { + + setup_data(&d.things); + atomic_store_release(&ready, 1); + } + + void + try_if_ready(void) + { + + if (atomic_load_acquire(&ready)) + do_stuff(d.things); + } +.Ed +.Pp +Publishing a pointer to the current snapshot of data. +(Caller must arrange that only one call to take_snapshot happens at any +given time; generally this should be done in coordination with +.Xr pserialize 9 +or similar to enable resource reclamation.) +.Bd -literal + struct data *current_d; + + void + take_snapshot(void) + { + struct data *d = kmem_alloc(sizeof(*d)); + + d->things = ...; + + atomic_store_release(¤t_d, d); + } + + struct data * + get_snapshot(void) + { + + return atomic_load_consume(¤t_d); + } +.Ed +.Sh CODE REFERENCES +.Pa sys/sys/atomic.h +.Sh SEE ALSO +.Xr atomic_ops 3 , +.Xr membar_ops 3 , +.Xr pserialize 9 +.Sh HISTORY +These atomic operations first appeared in +.Nx 10.0 . +.Sh CAVEATS +C11 formally specifies that all subexpressions, except the left +operands of the +.Li "&&" , "||" , "?:" , +and +.Li "," +operators and the +.Fn kill_dependency +macro, carry dependencies for which +.Dv memory_order_consume +guarantees ordering, but most or all implementations to date simply +treat +.Dv memory_order_consume +as +.Dv memory_order_acquire +and do not take advantage of data dependencies to elide costly memory +barriers or load-acquire CPU instructions. +.Pp +Instead, we implement +.Fn atomic_load_consume +as +.Fn atomic_load_relaxed +followed by +.Xr membar_datadep_consumer 3 , +which is equivalent to +.Xr membar_consumer 3 +on DEC Alpha and +.Xr __insn_barrier 3 +elsewhere. +.Sh BUGS +Some idiot decided to call it +.Em tearing , +depriving us of the opportunity to say that atomic operations prevent +fusion and +.Em fission .