DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529272206
##########
src/linux/cgroups2.cpp:
##########
@@ -295,4 +296,152 @@ Try<set<string>> enabled(const string& cgroup)
} // namespace controllers {
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+ DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+ {
+ program.append({
+ // r2: Type ('c', 'b', '?')
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
+ // r3: Access ('r', 'w', 'm')
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+ // r4: Major Version
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+ // r5: Minor Version
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+ }
+
+ Try<Nothing> allow(const Entry entry) { return addDevice(entry, true); }
+
+ Try<Nothing> deny(const Entry entry) { return addDevice(entry, false); }
+
+ Try<Nothing> addDevice(const Entry entry, bool allow)
+ {
+ // We create a block of bytecode with the format:
+ // 1. [ Type Check ] ⤵
+ // 2. [ Access Check ] ⤵
+ // 3. [ Minor Version Check ] ⤵
+ // 4. [ Major Version Check ] ⤵
+ // 5. [ Allow/Deny Access ] ↓
+ // ↓
+ // 6. [ NEXT BLOCK ] ←←
+ //
+ // Either:
+ // 1. The device access is matched by (1,2,3,4) and the Allow/Deny access
+ // block (5) is executed.
+ // 2. One of (1,2,3,4) does not match the requested access and we skip
+ // to the next block (6).
+
+ // We only check attributes that are not 100% permissive:
+ // - Type != all
+ // - Access != rnw
+ // - Minor != None (AKA "any")
+ // - Major != None (AKA "any")
+ Entry::Selector selector = entry.selector;
+ Entry::Access access = entry.access;
+
+ bool check_type = selector.type != Entry::Selector::Type::ALL;
+ bool check_access = !access.mknod || !access.read || !access.write;
+ bool check_major = selector.major.isSome();
+ bool check_minor = selector.minor.isSome();
+
+ // Number of instructions to the [NEXT BLOCK]. This is used if a check
+ // fails (meaning this whitelist entry does not apply) and we want to
+ // skip the subsequent checks.
+ short jmp_size = 1 + (check_access ? 3 : 0) + (check_type ? 1 : 0) +
+ (check_major ? 1 : 0) + (check_minor ? 1 : 0);
+
+ // Check type (r2) against entry.
+ if (check_type) {
+ int bpf_type = selector.type == Entry::Selector::Type::BLOCK
+ ? BPF_DEVCG_DEV_BLOCK
+ : BPF_DEVCG_DEV_CHAR;
+
+ program.append({ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, jmp_size) });
+ --jmp_size;
+ }
+ // Check access (r3) against entry.
+ if (check_access) {
+ int bpf_access = 0;
+ bpf_access |= access.read ? BPF_DEVCG_ACC_READ : 0;
+ bpf_access |= access.write ? BPF_DEVCG_ACC_WRITE : 0;
+ bpf_access |= access.mknod ? BPF_DEVCG_ACC_MKNOD : 0;
+
+ program.append({
+ BPF_MOV32_REG (BPF_REG_1, BPF_REG_3),
+ BPF_ALU32_IMM (BPF_AND, BPF_REG_1, bpf_access),
+ BPF_JMP_REG (BPF_JNE, BPF_REG_1, BPF_REG_3, (short) (jmp_size -
2))});
+ jmp_size -= 3;
+ }
+ // Check major version (r4) against entry.
+ if (check_major) {
+ program.append({
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, (int) selector.major.get(),
jmp_size)});
+ --jmp_size;
+ }
+ // Check minor version (r5) against entry.
+ if (check_minor) {
+ program.append({
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_5, (int) selector.minor.get(),
jmp_size)});
+ --jmp_size;
+ }
+
+ // Allow/Deny access block.
+ program.append({
+ BPF_MOV64_IMM(BPF_REG_0, allow ? ALLOW_ACCESS : DENY_ACCESS),
+ BPF_EXIT_INSN()});
+
+ return Nothing();
+ }
+
+ ebpf::Program build()
+ {
+ // Exit instructions.
+ // If no entry granted access, then deny the access.
+ program.append({
+ BPF_MOV64_IMM (BPF_REG_0, DENY_ACCESS),
+ BPF_EXIT_INSN()});
+
+ return program;
Review Comment:
If no devices are added to a program. If we _don't_ have these instructions
and you try and load the program without any device entries into the kernel, it
will cause a validation error. It's protective.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]