Per the Linux Kernel Memory Model, value-returning atomic RMW operations
must provide sequentially consistent ordering (a full memory barrier). On
LoongArch, plain AMO instructions and bare ll/sc loops do not satisfy this
requirement by themselves.
Update emit_atomic_rmw() to emit barrier-carrying instructions for all
value-returning BPF atomics:
- BPF_FETCH (ADD/AND/OR/XOR): use am*_db.{b,h,w,d}
- BPF_XCHG: use amswap_db.{b,h,w,d}
- BPF_CMPXCHG: emit dbar 0x700 after the ll/sc loop, matching
__WEAK_LLSC_MB in cmpxchg.h
Add the corresponding instruction encodings and emit helpers to inst.h.
Non-value-returning RMW ops (plain BPF_ADD, BPF_AND, etc.) are left as
weakly ordered, consistent with LKMM.
Signed-off-by: Chenguang Zhao <[email protected]>
---
arch/loongarch/include/asm/inst.h | 18 +++++++++++++++++
arch/loongarch/net/bpf_jit.c | 32 +++++++++++++++++--------------
2 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/arch/loongarch/include/asm/inst.h
b/arch/loongarch/include/asm/inst.h
index 76b723590023..bdbc17d07110 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -199,6 +199,10 @@ enum reg3_op {
amswaph_op = 0x70b9,
amaddb_op = 0x70ba,
amaddh_op = 0x70bb,
+ amswapdbb_op = 0x70bc,
+ amswapdbh_op = 0x70bd,
+ amadddbb_op = 0x70be,
+ amadddbh_op = 0x70bf,
amswapw_op = 0x70c0,
amswapd_op = 0x70c1,
amaddw_op = 0x70c2,
@@ -783,6 +787,20 @@ DEF_EMIT_REG3_FORMAT(amswapb, amswapb_op)
DEF_EMIT_REG3_FORMAT(amswaph, amswaph_op)
DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op)
DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op)
+DEF_EMIT_REG3_FORMAT(amswapdbb, amswapdbb_op)
+DEF_EMIT_REG3_FORMAT(amswapdbh, amswapdbh_op)
+DEF_EMIT_REG3_FORMAT(amadddbb, amadddbb_op)
+DEF_EMIT_REG3_FORMAT(amadddbh, amadddbh_op)
+DEF_EMIT_REG3_FORMAT(amadddbw, amadddbw_op)
+DEF_EMIT_REG3_FORMAT(amadddbd, amadddbd_op)
+DEF_EMIT_REG3_FORMAT(amanddbw, amanddbw_op)
+DEF_EMIT_REG3_FORMAT(amanddbd, amanddbd_op)
+DEF_EMIT_REG3_FORMAT(amordbw, amordbw_op)
+DEF_EMIT_REG3_FORMAT(amordbd, amordbd_op)
+DEF_EMIT_REG3_FORMAT(amxordbw, amxordbw_op)
+DEF_EMIT_REG3_FORMAT(amxordbd, amxordbd_op)
+DEF_EMIT_REG3_FORMAT(amswapdbw, amswapdbw_op)
+DEF_EMIT_REG3_FORMAT(amswapdbd, amswapdbd_op)
#define DEF_EMIT_REG3SA2_FORMAT(NAME, OP) \
static inline void emit_##NAME(union loongarch_instruction *insn, \
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 24913dc7f4e8..47707579e61c 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -7,6 +7,9 @@
#include <linux/memory.h>
#include "bpf_jit.h"
+/* dbar hint for ll/sc completion ordering, see __WEAK_LLSC_MB */
+#define DBAR_LLSC_MB 0x700
+
#define LOONGARCH_MAX_REG_ARGS 8
#define LOONGARCH_LONG_JUMP_NINSNS 5
@@ -418,7 +421,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn,
struct jit_ctx *ctx)
pr_err_once("bpf-jit: amadd.b instruction is
not supported\n");
return -EINVAL;
}
- emit_insn(ctx, amaddb, src, t1, t3);
+ emit_insn(ctx, amadddbb, src, t1, t3);
emit_zext_32(ctx, src, true);
break;
case BPF_H:
@@ -426,39 +429,39 @@ static int emit_atomic_rmw(const struct bpf_insn *insn,
struct jit_ctx *ctx)
pr_err_once("bpf-jit: amadd.h instruction is
not supported\n");
return -EINVAL;
}
- emit_insn(ctx, amaddh, src, t1, t3);
+ emit_insn(ctx, amadddbh, src, t1, t3);
emit_zext_32(ctx, src, true);
break;
case BPF_W:
- emit_insn(ctx, amaddw, src, t1, t3);
+ emit_insn(ctx, amadddbw, src, t1, t3);
emit_zext_32(ctx, src, true);
break;
case BPF_DW:
- emit_insn(ctx, amaddd, src, t1, t3);
+ emit_insn(ctx, amadddbd, src, t1, t3);
break;
}
break;
case BPF_AND | BPF_FETCH:
if (isdw) {
- emit_insn(ctx, amandd, src, t1, t3);
+ emit_insn(ctx, amanddbd, src, t1, t3);
} else {
- emit_insn(ctx, amandw, src, t1, t3);
+ emit_insn(ctx, amanddbw, src, t1, t3);
emit_zext_32(ctx, src, true);
}
break;
case BPF_OR | BPF_FETCH:
if (isdw) {
- emit_insn(ctx, amord, src, t1, t3);
+ emit_insn(ctx, amordbd, src, t1, t3);
} else {
- emit_insn(ctx, amorw, src, t1, t3);
+ emit_insn(ctx, amordbw, src, t1, t3);
emit_zext_32(ctx, src, true);
}
break;
case BPF_XOR | BPF_FETCH:
if (isdw) {
- emit_insn(ctx, amxord, src, t1, t3);
+ emit_insn(ctx, amxordbd, src, t1, t3);
} else {
- emit_insn(ctx, amxorw, src, t1, t3);
+ emit_insn(ctx, amxordbw, src, t1, t3);
emit_zext_32(ctx, src, true);
}
break;
@@ -470,7 +473,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn,
struct jit_ctx *ctx)
pr_err_once("bpf-jit: amswap.b instruction is
not supported\n");
return -EINVAL;
}
- emit_insn(ctx, amswapb, src, t1, t3);
+ emit_insn(ctx, amswapdbb, src, t1, t3);
emit_zext_32(ctx, src, true);
break;
case BPF_H:
@@ -478,15 +481,15 @@ static int emit_atomic_rmw(const struct bpf_insn *insn,
struct jit_ctx *ctx)
pr_err_once("bpf-jit: amswap.h instruction is
not supported\n");
return -EINVAL;
}
- emit_insn(ctx, amswaph, src, t1, t3);
+ emit_insn(ctx, amswapdbh, src, t1, t3);
emit_zext_32(ctx, src, true);
break;
case BPF_W:
- emit_insn(ctx, amswapw, src, t1, t3);
+ emit_insn(ctx, amswapdbw, src, t1, t3);
emit_zext_32(ctx, src, true);
break;
case BPF_DW:
- emit_insn(ctx, amswapd, src, t1, t3);
+ emit_insn(ctx, amswapdbd, src, t1, t3);
break;
}
break;
@@ -509,6 +512,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn,
struct jit_ctx *ctx)
emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
emit_zext_32(ctx, r0, true);
}
+ emit_insn(ctx, dbar, DBAR_LLSC_MB);
break;
default:
pr_err_once("bpf-jit: invalid atomic read-modify-write opcode
%02x\n", imm);
--
2.25.1