Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-22 Thread via GitHub


bmahler closed pull request #519: [cgroups2][ebpf] Configure device access 
permissions.
URL: https://github.com/apache/mesos/pull/519


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-20 Thread via GitHub


bmahler commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1532862960


##
src/linux/cgroups2.hpp:
##
@@ -75,6 +77,10 @@ Try move_process(const std::string& cgroup, pid_t 
pid);
 // /sys/fs/cgroup. E.g. For /sys/fs/cgroup/test, this will return "test".
 Try cgroup(pid_t pid);
 
+
+// Get the absolute of a cgroup. Assumes to cgroup exists.
+std::string path(const std::string& cgroup);

Review Comment:
   maybe add this in a different PR and replace all the existing path joins 
with this?



##
src/linux/cgroups2.cpp:
##
@@ -442,4 +449,204 @@ Try weight(const string& cgroup)
 
 } // namespace cpu {
 
+namespace devices {
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+  // The BPF_PROG_TYPE_CGROUP_DEVICE program takes in
+  // `struct bpf_cgroup_dev_ctx*` as input. We extract the fields into
+  // registers r2-5.
+  //
+  // The device type is encoded in the first 16 bits of `access_type` and
+  // the access type is encoded in the last 16 bits of `access_type`.
+  //
+  // r2: Type ('c', 'b', '?')
+  BPF_LDX_MEM(
+BPF_W, BPF_REG_2, BPF_REG_1, offsetof(bpf_cgroup_dev_ctx, 
access_type)),
+  BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+  // r3: Access ('r', 'w', 'm')
+  BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+offsetof(bpf_cgroup_dev_ctx, access_type)),
+  BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+  // r4: Major Version
+  BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+offsetof(bpf_cgroup_dev_ctx, major)),
+  // r5: Minor Version
+  BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+offsetof(bpf_cgroup_dev_ctx, minor)),
+});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+if (hasCatchAll) {
+  return Nothing();
+}
+// return Nothing();
+// We create a block of bytecode with the format:
+// 1. Major Version Check
+// 2. Minor Version Check
+// 3. Type Check
+// 4. Access Check
+// 5. Allow/Deny Access
+//
+// 6. NEXT BLOCK
+//
+// Either:
+// 1. The device access is matched by (1,2,3,4) and the Allow/Deny access
+//block (5) is executed.
+// 2. One of (1,2,3,4) does not match the requested access and we skip
+//to the next block (6).
+
+const Entry::Selector& selector = entry.selector;
+const Entry::Access& access = entry.access;
+
+// We only check attributes that are not 100% permissive:
+// - Major  != None (AKA "any")
+// - Minor  != None (AKA "any")
+// - Type   != all
+// - Access != rnw
+bool check_major = selector.major.isSome();
+bool check_minor = selector.minor.isSome();
+bool check_type = selector.type != Entry::Selector::Type::ALL;
+bool check_access = !access.mknod || !access.read || !access.write;
+
+// Number of instructions to the [NEXT BLOCK]. This is used if a check
+// fails (meaning this whitelist entry does not apply) and we want to
+// skip the subsequent checks.
+short jmp_size = 1 + (check_access ? 3 : 0) + (check_type ? 1 : 0) +
+ (check_major ? 1 : 0) + (check_minor ? 1 : 0);
+
+// Check major version (r4) against entry.
+if (check_major) {
+  program.append({
+BPF_JMP_IMM(BPF_JNE, BPF_REG_4, (int)selector.major.get(), jmp_size),
+  });
+  --jmp_size;
+}
+
+// Check minor version (r5) against entry.
+if (check_minor) {
+  program.append({
+BPF_JMP_IMM(BPF_JNE, BPF_REG_5, (int)selector.minor.get(), jmp_size),
+  });
+  --jmp_size;
+}
+
+// Check type (r2) against entry.
+if (check_type) {
+  int bpf_type = 0;
+
+  switch (selector.type) {
+case Entry::Selector::Type::BLOCK: {
+  bpf_type = BPF_DEVCG_DEV_BLOCK;
+  break;
+}
+case Entry::Selector::Type::CHARACTER: {
+  bpf_type = BPF_DEVCG_DEV_CHAR;
+  break;
+}
+case Entry::Selector::Type::ALL: {
+  // Won't be reached, since `check_type` is true.
+  break;
+}
+  }
+
+  program.append({
+BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, jmp_size),
+  });
+  --jmp_size;
+}
+
+// Check access (r3) against entry.
+if (check_access) {
+  int bpf_access = 0;
+  bpf_access |= access.read ? BPF_DEVCG_ACC_READ : 0;
+  bpf_access |= access.write ? BPF_DEVCG_ACC_WRITE : 0;
+  bpf_access |= access.mknod ? BPF_DEVCG_ACC_MKNOD : 0;
+
+  program.append({
+BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_1, bpf_access),
+

Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-20 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1532734374


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});

Review Comment:
   Make this more clear.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-20 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1532660172


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵

Review Comment:
   Reordered.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-19 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1530418511


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵
+// 3. [ Minor Version Check ] ⤵
+// 4. [ Major Version Check ] ⤵
+// 5. [ Allow/Deny Access   ]  ↓
+// ↓
+// 6. [ NEXT BLOCK  ] ←←
+//
+// Either:
+// 1. The device access is matched by (1,2,3,4) and the Allow/Deny access
+//block (5) is executed.
+// 2. One of (1,2,3,4) does not match the requested access and we skip
+//to the next block (6).
+
+// We only check attributes that are not 100% permissive:
+// - Type   != all
+// - Access != rnw
+// - Minor  != None (AKA "any")
+// - Major  != None (AKA "any")
+Entry::Selector selector = entry.selector;
+Entry::Access access = entry.access;
+
+bool check_type = selector.type != Entry::Selector::Type::ALL;
+bool check_access = !access.mknod || !access.read || !access.write;
+bool check_major = selector.major.isSome();
+bool check_minor = selector.minor.isSome();
+
+// Number of instructions to the [NEXT BLOCK]. This is used if a check
+// fails (meaning this whitelist entry does not apply) and we want to
+// skip the subsequent checks.
+short jmp_size = 1 + (check_access ? 3 : 0) + (check_type ? 1 : 0) +
+ (check_major ? 1 : 0) + (check_minor ? 1 : 0);
+
+// Check type (r2) against entry.
+if (check_type) {
+  int bpf_type = selector.type == Entry::Selector::Type::BLOCK
+   ? BPF_DEVCG_DEV_BLOCK
+   : BPF_DEVCG_DEV_CHAR;
+
+  program.append({ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, jmp_size) });
+  --jmp_size;
+}
+// Check access (r3) against entry.
+if (check_access) {
+  int bpf_access = 0;
+  bpf_access |= access.read ? BPF_DEVCG_ACC_READ : 0;
+  bpf_access |= access.write ? BPF_DEVCG_ACC_WRITE : 0;
+  bpf_access |= access.mknod ? BPF_DEVCG_ACC_MKNOD : 0;
+
+ program.append({
+  BPF_MOV32_REG (BPF_REG_1, BPF_REG_3),
+  BPF_ALU32_IMM (BPF_AND, BPF_REG_1, bpf_access),
+  BPF_JMP_REG (BPF_JNE, BPF_REG_1, BPF_REG_3, (short) (jmp_size - 
2))});
+  jmp_size -= 3;
+}
+// Check major version (r4) against entry.
+if (check_major) {
+  program.append({
+  BPF_JMP_IMM(BPF_JNE, BPF_REG_4, (int) selector.major.get(), 
jmp_size)});
+  --jmp_size;
+}
+// Check minor version (r5) against entry.
+if (check_minor) {
+  program.append({
+  BPF_JMP_IMM(BPF_JNE, BPF_REG_5, (int) selector.minor.get(), 
jmp_size)});
+  --jmp_size;
+}
+
+// Allow/Deny access block.
+program.append({
+BPF_MOV64_IMM(BPF_REG_0, allow ? ALLOW_ACCESS : DENY_ACCESS),
+BPF_EXIT_INSN()});
+
+return Nothing();
+  }
+
+  ebpf::Program build()
+  {
+// Exit instructions.
+// If no entry granted access, then deny the access.
+program.append({
+BPF_MOV64_IMM (BPF_REG_0, DENY_ACCESS),
+BPF_EXIT_INSN()});
+
+return program;

Review Comment:
   Actually, reflecting, we reach this point just as described in the comment:
   ```
   // If no entry granted access, then deny the access.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-19 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1530377439


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)

Review Comment:
   Actually, since it's only in the`*.cpp` file I'm keeping it public. I would 
have assumed that I could mark it public and still have access within 
`cgroups2.cpp`, but that doesn't look to be the case.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


bmahler commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529415877


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵
+// 3. [ Minor Version Check ] ⤵
+// 4. [ Major Version Check ] ⤵
+// 5. [ Allow/Deny Access   ]  ↓
+// ↓
+// 6. [ NEXT BLOCK  ] ←←
+//
+// Either:
+// 1. The device access is matched by (1,2,3,4) and the Allow/Deny access
+//block (5) is executed.
+// 2. One of (1,2,3,4) does not match the requested access and we skip
+//to the next block (6).
+
+// We only check attributes that are not 100% permissive:
+// - Type   != all
+// - Access != rnw
+// - Minor  != None (AKA "any")
+// - Major  != None (AKA "any")
+Entry::Selector selector = entry.selector;
+Entry::Access access = entry.access;
+
+bool check_type = selector.type != Entry::Selector::Type::ALL;
+bool check_access = !access.mknod || !access.read || !access.write;
+bool check_major = selector.major.isSome();
+bool check_minor = selector.minor.isSome();
+
+// Number of instructions to the [NEXT BLOCK]. This is used if a check
+// fails (meaning this whitelist entry does not apply) and we want to
+// skip the subsequent checks.
+short jmp_size = 1 + (check_access ? 3 : 0) + (check_type ? 1 : 0) +
+ (check_major ? 1 : 0) + (check_minor ? 1 : 0);
+
+// Check type (r2) against entry.
+if (check_type) {
+  int bpf_type = selector.type == Entry::Selector::Type::BLOCK
+   ? BPF_DEVCG_DEV_BLOCK
+   : BPF_DEVCG_DEV_CHAR;
+
+  program.append({ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, jmp_size) });
+  --jmp_size;
+}
+// Check access (r3) against entry.
+if (check_access) {
+  int bpf_access = 0;
+  bpf_access |= access.read ? BPF_DEVCG_ACC_READ : 0;
+  bpf_access |= access.write ? BPF_DEVCG_ACC_WRITE : 0;
+  bpf_access |= access.mknod ? BPF_DEVCG_ACC_MKNOD : 0;
+
+ program.append({
+  BPF_MOV32_REG (BPF_REG_1, BPF_REG_3),
+  BPF_ALU32_IMM (BPF_AND, BPF_REG_1, bpf_access),
+  BPF_JMP_REG (BPF_JNE, BPF_REG_1, BPF_REG_3, (short) (jmp_size - 
2))});
+  jmp_size -= 3;
+}
+// Check major version (r4) against entry.
+if (check_major) {
+  program.append({
+  BPF_JMP_IMM(BPF_JNE, BPF_REG_4, (int) selector.major.get(), 
jmp_size)});
+  --jmp_size;
+}
+// Check minor version (r5) against entry.
+if (check_minor) {
+  program.append({
+  BPF_JMP_IMM(BPF_JNE, BPF_REG_5, (int) selector.minor.get(), 
jmp_size)});
+  --jmp_size;
+}
+
+// Allow/Deny access block.
+program.append({
+BPF_MOV64_IMM(BPF_REG_0, allow ? ALLOW_ACCESS : DENY_ACCESS),
+BPF_EXIT_INSN()});
+
+return Nothing();
+  }
+
+  ebpf::Program build()
+  {
+// Exit instructions.
+// If no entry granted access, then deny the access.
+program.append({
+BPF_MOV64_IMM (BPF_REG_0, DENY_ACCESS),
+BPF_EXIT_INSN()});
+
+return program;

Review Comment:
   ah ok, in that case, can we generate this only when needed, for clarity?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


bmahler commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529414026


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵

Review Comment:
   I understand that regardless of ordering, these all need to be checked and 
we jump out if any don't match, but it seems more natural to reason about 
checking in the same order that we'd be writing the logic if we were to code 
the logic if we weren't generating assembly, in which case I would imagine that 
the code would be written to first examine if this is the right device, then 
check access. It also seems a bit easier to reason about the access part if it 
resides closer to the final access code?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


bmahler commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529406486


##
src/tests/containerizer/cgroups2_tests.cpp:
##
@@ -59,6 +63,30 @@ TEST_F(Cgroups2Test, ROOT_CGROUPS2_AvailableSubsystems)
   }
 }
 
+
+TEST_F(Cgroups2Test, ROOT_CGROUPS2_DeviceController)
+{
+  namespace devices = cgroups2::devices;
+
+  // Deny all access.
+  devices::Entry entry = devices::Entry::parse("a *:* rwm").get();
+
+  EXPECT_SOME(devices::configure(cgroups2::ROOT_CGROUP, {}, {entry}));
+  Try> attached = ebpf::cgroups2::attached(
+  cgroups2::ROOT_CGROUP);
+
+  ASSERT_SOME(attached);
+  EXPECT_EQ(1, (int) attached->size());
+
+  // Verify that we can't open /dev/null.
+  EXPECT_ERROR(os::open(os::DEV_NULL, O_RDWR));
+
+  EXPECT_SOME(ebpf::cgroups2::detach(cgroups2::ROOT_CGROUP, 
attached.get()[0]));

Review Comment:
   it would need to be `(*attached)[0]` due to operator precedence rules IIRC



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529272206


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵
+// 3. [ Minor Version Check ] ⤵
+// 4. [ Major Version Check ] ⤵
+// 5. [ Allow/Deny Access   ]  ↓
+// ↓
+// 6. [ NEXT BLOCK  ] ←←
+//
+// Either:
+// 1. The device access is matched by (1,2,3,4) and the Allow/Deny access
+//block (5) is executed.
+// 2. One of (1,2,3,4) does not match the requested access and we skip
+//to the next block (6).
+
+// We only check attributes that are not 100% permissive:
+// - Type   != all
+// - Access != rnw
+// - Minor  != None (AKA "any")
+// - Major  != None (AKA "any")
+Entry::Selector selector = entry.selector;
+Entry::Access access = entry.access;
+
+bool check_type = selector.type != Entry::Selector::Type::ALL;
+bool check_access = !access.mknod || !access.read || !access.write;
+bool check_major = selector.major.isSome();
+bool check_minor = selector.minor.isSome();
+
+// Number of instructions to the [NEXT BLOCK]. This is used if a check
+// fails (meaning this whitelist entry does not apply) and we want to
+// skip the subsequent checks.
+short jmp_size = 1 + (check_access ? 3 : 0) + (check_type ? 1 : 0) +
+ (check_major ? 1 : 0) + (check_minor ? 1 : 0);
+
+// Check type (r2) against entry.
+if (check_type) {
+  int bpf_type = selector.type == Entry::Selector::Type::BLOCK
+   ? BPF_DEVCG_DEV_BLOCK
+   : BPF_DEVCG_DEV_CHAR;
+
+  program.append({ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, jmp_size) });
+  --jmp_size;
+}
+// Check access (r3) against entry.
+if (check_access) {
+  int bpf_access = 0;
+  bpf_access |= access.read ? BPF_DEVCG_ACC_READ : 0;
+  bpf_access |= access.write ? BPF_DEVCG_ACC_WRITE : 0;
+  bpf_access |= access.mknod ? BPF_DEVCG_ACC_MKNOD : 0;
+
+ program.append({
+  BPF_MOV32_REG (BPF_REG_1, BPF_REG_3),
+  BPF_ALU32_IMM (BPF_AND, BPF_REG_1, bpf_access),
+  BPF_JMP_REG (BPF_JNE, BPF_REG_1, BPF_REG_3, (short) (jmp_size - 
2))});
+  jmp_size -= 3;
+}
+// Check major version (r4) against entry.
+if (check_major) {
+  program.append({
+  BPF_JMP_IMM(BPF_JNE, BPF_REG_4, (int) selector.major.get(), 
jmp_size)});
+  --jmp_size;
+}
+// Check minor version (r5) against entry.
+if (check_minor) {
+  program.append({
+  BPF_JMP_IMM(BPF_JNE, BPF_REG_5, (int) selector.minor.get(), 
jmp_size)});
+  --jmp_size;
+}
+
+// Allow/Deny access block.
+program.append({
+BPF_MOV64_IMM(BPF_REG_0, allow ? ALLOW_ACCESS : DENY_ACCESS),
+BPF_EXIT_INSN()});
+
+return Nothing();
+  }
+
+  ebpf::Program build()
+  {
+// Exit instructions.
+// If no entry granted access, then deny the access.
+program.append({
+BPF_MOV64_IMM (BPF_REG_0, DENY_ACCESS),
+BPF_EXIT_INSN()});
+
+return program;

Review Comment:
   If no devices are added to a program. If we _don't_ have these instructions 
and you try and load the program without any device entries into the kernel, it 
will cause a validation error. It's protective.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529056843


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵
+// 3. [ Minor Version Check ] ⤵
+// 4. [ Major Version Check ] ⤵
+// 5. [ Allow/Deny Access   ]  ↓
+// ↓
+// 6. [ NEXT BLOCK  ] ←←
+//
+// Either:
+// 1. The device access is matched by (1,2,3,4) and the Allow/Deny access
+//block (5) is executed.
+// 2. One of (1,2,3,4) does not match the requested access and we skip
+//to the next block (6).
+
+// We only check attributes that are not 100% permissive:
+// - Type   != all
+// - Access != rnw
+// - Minor  != None (AKA "any")
+// - Major  != None (AKA "any")

Review Comment:
   Will move the comment :+1:



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529056560


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵

Review Comment:
   Since all of the checks need to pass, I considered the order to be somewhat 
irrelevant. Ordering is semantically the same, we'd just short-circuit. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529055012


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵
+// 3. [ Minor Version Check ] ⤵
+// 4. [ Major Version Check ] ⤵
+// 5. [ Allow/Deny Access   ]  ↓
+// ↓
+// 6. [ NEXT BLOCK  ] ←←

Review Comment:
   The instruction _after_ the last instruction that is added by a call to 
`addDevice()`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529053611


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵
+// 3. [ Minor Version Check ] ⤵
+// 4. [ Major Version Check ] ⤵
+// 5. [ Allow/Deny Access   ]  ↓
+// ↓
+// 6. [ NEXT BLOCK  ] ←←

Review Comment:
   Sure, I'll remove them.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529053200


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)

Review Comment:
   `private` :+1:



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529051851


##
src/linux/cgroups2.hpp:
##
@@ -69,6 +71,20 @@ Try> enabled(const std::string& 
cgroup);
 
 } // namespace controllers {
 
+namespace devices {
+
+using cgroups::devices::Entry;
+
+// Configure the device access permissions for the cgroup. These permissions
+// are hierarchical. I.e. if a parent cgroup does not allow an access then
+// 'this' cgroup will be denied access.
+Try configure(
+const std::string& cgroup,
+const std::vector allow,
+const std::vector deny);

Review Comment:
   Will update to be `const vector&` :+1:



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529051271


##
src/tests/containerizer/cgroups2_tests.cpp:
##
@@ -59,6 +63,30 @@ TEST_F(Cgroups2Test, ROOT_CGROUPS2_AvailableSubsystems)
   }
 }
 
+
+TEST_F(Cgroups2Test, ROOT_CGROUPS2_DeviceController)
+{
+  namespace devices = cgroups2::devices;
+
+  // Deny all access.
+  devices::Entry entry = devices::Entry::parse("a *:* rwm").get();
+
+  EXPECT_SOME(devices::configure(cgroups2::ROOT_CGROUP, {}, {entry}));
+  Try> attached = ebpf::cgroups2::attached(
+  cgroups2::ROOT_CGROUP);
+
+  ASSERT_SOME(attached);
+  EXPECT_EQ(1, (int) attached->size());
+
+  // Verify that we can't open /dev/null.
+  EXPECT_ERROR(os::open(os::DEV_NULL, O_RDWR));
+
+  EXPECT_SOME(ebpf::cgroups2::detach(cgroups2::ROOT_CGROUP, 
attached.get()[0]));

Review Comment:
   Wasn't aware of `->at(0)`, will do that (`*attached[0]` doesn't work, 
unfortunately)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529050289


##
src/tests/containerizer/cgroups2_tests.cpp:
##
@@ -59,6 +63,30 @@ TEST_F(Cgroups2Test, ROOT_CGROUPS2_AvailableSubsystems)
   }
 }
 
+
+TEST_F(Cgroups2Test, ROOT_CGROUPS2_DeviceController)
+{
+  namespace devices = cgroups2::devices;
+
+  // Deny all access.
+  devices::Entry entry = devices::Entry::parse("a *:* rwm").get();
+
+  EXPECT_SOME(devices::configure(cgroups2::ROOT_CGROUP, {}, {entry}));
+  Try> attached = ebpf::cgroups2::attached(
+  cgroups2::ROOT_CGROUP);
+
+  ASSERT_SOME(attached);
+  EXPECT_EQ(1, (int) attached->size());
+
+  // Verify that we can't open /dev/null.
+  EXPECT_ERROR(os::open(os::DEV_NULL, O_RDWR));
+
+  EXPECT_SOME(ebpf::cgroups2::detach(cgroups2::ROOT_CGROUP, 
attached.get()[0]));

Review Comment:
   Will update to create a new cgroup for testing. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529049562


##
src/tests/containerizer/cgroups2_tests.cpp:
##
@@ -59,6 +63,30 @@ TEST_F(Cgroups2Test, ROOT_CGROUPS2_AvailableSubsystems)
   }
 }
 
+
+TEST_F(Cgroups2Test, ROOT_CGROUPS2_DeviceController)
+{
+  namespace devices = cgroups2::devices;
+
+  // Deny all access.
+  devices::Entry entry = devices::Entry::parse("a *:* rwm").get();
+
+  EXPECT_SOME(devices::configure(cgroups2::ROOT_CGROUP, {}, {entry}));
+  Try> attached = ebpf::cgroups2::attached(
+  cgroups2::ROOT_CGROUP);
+
+  ASSERT_SOME(attached);
+  EXPECT_EQ(1, (int) attached->size());

Review Comment:
   The error message it produces is not very telling, but yeah the cast is 
required. I'll use `1u` instead :+1:



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


DevinLeamy commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1529047359


##
src/tests/containerizer/cgroups2_tests.cpp:
##
@@ -59,6 +63,30 @@ TEST_F(Cgroups2Test, ROOT_CGROUPS2_AvailableSubsystems)
   }
 }
 
+
+TEST_F(Cgroups2Test, ROOT_CGROUPS2_DeviceController)
+{
+  namespace devices = cgroups2::devices;
+
+  // Deny all access.
+  devices::Entry entry = devices::Entry::parse("a *:* rwm").get();
+
+  EXPECT_SOME(devices::configure(cgroups2::ROOT_CGROUP, {}, {entry}));
+  Try> attached = ebpf::cgroups2::attached(
+  cgroups2::ROOT_CGROUP);

Review Comment:
   Agreed. Will do :+1:



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


bmahler commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1528991265


##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵
+// 3. [ Minor Version Check ] ⤵
+// 4. [ Major Version Check ] ⤵
+// 5. [ Allow/Deny Access   ]  ↓
+// ↓
+// 6. [ NEXT BLOCK  ] ←←
+//
+// Either:
+// 1. The device access is matched by (1,2,3,4) and the Allow/Deny access
+//block (5) is executed.
+// 2. One of (1,2,3,4) does not match the requested access and we skip
+//to the next block (6).
+
+// We only check attributes that are not 100% permissive:
+// - Type   != all
+// - Access != rnw
+// - Minor  != None (AKA "any")
+// - Major  != None (AKA "any")
+Entry::Selector selector = entry.selector;
+Entry::Access access = entry.access;
+
+bool check_type = selector.type != Entry::Selector::Type::ALL;
+bool check_access = !access.mknod || !access.read || !access.write;
+bool check_major = selector.major.isSome();
+bool check_minor = selector.minor.isSome();
+
+// Number of instructions to the [NEXT BLOCK]. This is used if a check
+// fails (meaning this whitelist entry does not apply) and we want to
+// skip the subsequent checks.
+short jmp_size = 1 + (check_access ? 3 : 0) + (check_type ? 1 : 0) +
+ (check_major ? 1 : 0) + (check_minor ? 1 : 0);
+
+// Check type (r2) against entry.
+if (check_type) {
+  int bpf_type = selector.type == Entry::Selector::Type::BLOCK
+   ? BPF_DEVCG_DEV_BLOCK
+   : BPF_DEVCG_DEV_CHAR;
+
+  program.append({ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, jmp_size) });
+  --jmp_size;
+}
+// Check access (r3) against entry.
+if (check_access) {
+  int bpf_access = 0;
+  bpf_access |= access.read ? BPF_DEVCG_ACC_READ : 0;
+  bpf_access |= access.write ? BPF_DEVCG_ACC_WRITE : 0;
+  bpf_access |= access.mknod ? BPF_DEVCG_ACC_MKNOD : 0;
+
+ program.append({
+  BPF_MOV32_REG (BPF_REG_1, BPF_REG_3),
+  BPF_ALU32_IMM (BPF_AND, BPF_REG_1, bpf_access),
+  BPF_JMP_REG (BPF_JNE, BPF_REG_1, BPF_REG_3, (short) (jmp_size - 
2))});
+  jmp_size -= 3;
+}
+// Check major version (r4) against entry.
+if (check_major) {
+  program.append({
+  BPF_JMP_IMM(BPF_JNE, BPF_REG_4, (int) selector.major.get(), 
jmp_size)});

Review Comment:
   would be good to explain these instructions in general, e.g.
   
   ```
   // Jump to XXX if major (r4) != selector.major.
   ```
   
   Are we jumping *past* the first instruction of the allow / deny access block 
below? If so, it seems like it makes sense to separate out the append of the 
BPF_EXIT_INSN instruction and make it more clear in the comments here that 
we're jumping directly to that?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@mesos.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



Re: [PR] [cgroups2][ebpf] Configure device access permissions. [mesos]

2024-03-18 Thread via GitHub


bmahler commented on code in PR #519:
URL: https://github.com/apache/mesos/pull/519#discussion_r1528930282


##
src/tests/containerizer/cgroups2_tests.cpp:
##
@@ -59,6 +63,30 @@ TEST_F(Cgroups2Test, ROOT_CGROUPS2_AvailableSubsystems)
   }
 }
 
+
+TEST_F(Cgroups2Test, ROOT_CGROUPS2_DeviceController)
+{
+  namespace devices = cgroups2::devices;
+
+  // Deny all access.
+  devices::Entry entry = devices::Entry::parse("a *:* rwm").get();

Review Comment:
   Let's ASSERT_SOME on this rather than doing a naked .get(), later code can 
use * instead of .get().



##
src/tests/containerizer/cgroups2_tests.cpp:
##
@@ -59,6 +63,30 @@ TEST_F(Cgroups2Test, ROOT_CGROUPS2_AvailableSubsystems)
   }
 }
 
+
+TEST_F(Cgroups2Test, ROOT_CGROUPS2_DeviceController)
+{
+  namespace devices = cgroups2::devices;
+
+  // Deny all access.
+  devices::Entry entry = devices::Entry::parse("a *:* rwm").get();
+
+  EXPECT_SOME(devices::configure(cgroups2::ROOT_CGROUP, {}, {entry}));
+  Try> attached = ebpf::cgroups2::attached(
+  cgroups2::ROOT_CGROUP);

Review Comment:
   Is this modifying the root cgroup in a test? We don't want to do that since 
that's going to mess with other things on the system that run in the root 
cgroup? Instead we'd want to create a test cgroup and clean it up properly 
afterwards.



##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)

Review Comment:
   do you want this private or protected?



##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+
+  Try allow(const Entry entry) { return addDevice(entry, true); }
+
+  Try deny(const Entry entry) { return addDevice(entry, false); }
+
+  Try addDevice(const Entry entry, bool allow)
+  {
+// We create a block of bytecode with the format:
+// 1. [ Type Check  ] ⤵
+// 2. [ Access Check] ⤵
+// 3. [ Minor Version Check ] ⤵
+// 4. [ Major Version Check ] ⤵
+// 5. [ Allow/Deny Access   ]  ↓
+// ↓
+// 6. [ NEXT BLOCK  ] ←←

Review Comment:
   can you remove the unicode characters? maybe this works, but I don't think 
we've added any non-ASCII to our current files.. have we?



##
src/linux/cgroups2.cpp:
##
@@ -295,4 +296,152 @@ Try> enabled(const string& cgroup)
 
 } // namespace controllers {
 
+namespace devices {
+
+const int ALLOW_ACCESS = 1;
+
+const int DENY_ACCESS = 0;
+
+// Utility class to construct an eBPF program to whitelist or blacklist
+// select device accesses.
+class DeviceProgram
+{
+public:
+  DeviceProgram() : program{ebpf::Program(BPF_PROG_TYPE_CGROUP_DEVICE)}
+  {
+program.append({
+// r2: Type ('c', 'b', '?')
+BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0x),
+// r3: Access ('r', 'w', 'm')
+BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+// r4: Major Version
+BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4),
+// r5: Minor Version
+BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 8)});
+  }
+