> On July 10, 2024, 8:18 p.m., Benjamin Mahler wrote:
> > Ship It!

```
diff --git a/src/linux/cgroups2.cpp b/src/linux/cgroups2.cpp
index 9ae8df387..59c212ebc 100644
--- a/src/linux/cgroups2.cpp
+++ b/src/linux/cgroups2.cpp
@@ -1164,7 +1164,8 @@ public:
   // |Exit instruction to deny access                              |
   // +-------------------------------------------------------------+
   static Try<ebpf::Program> build(
-    const vector<Entry>& allow, const vector<Entry>& deny)
+      const vector<Entry>& allow,
+         const vector<Entry>& deny)
   {
     // The BPF_PROG_TYPE_CGROUP_DEVICE program takes in
     // `struct bpf_cgroup_dev_ctx*` as input. We extract the fields into
@@ -1209,25 +1210,22 @@ public:
       return program;
     }
 
-    auto allow_block_trailer =
-      [](short jmp_size_to_deny_section) -> vector<bpf_insn> {
-      return {BPF_JMP_A(jmp_size_to_deny_section)};
+    auto allow_block_trailer = [](short jmp_size_to_deny_section) {
+      return vector<bpf_insn>({BPF_JMP_A(jmp_size_to_deny_section)});
     };
-    auto allow_section_trailer = []() -> vector<bpf_insn> {
-      return {BPF_EXIT_INSN()};
+    auto allow_section_trailer = []() {
+      return vector<bpf_insn>({BPF_EXIT_INSN()});
     };
-    auto deny_block_trailer = []() -> vector<bpf_insn> {
-      return {BPF_EXIT_INSN()};
+    auto deny_block_trailer = []() {
+      return vector<bpf_insn>({BPF_EXIT_INSN()});
     };
-    auto deny_section_trailer = []() -> vector<bpf_insn> {
-      return {
+    auto deny_section_trailer = []() {
+      return vector<bpf_insn>({
         BPF_MOV64_IMM(BPF_REG_0, ALLOW_ACCESS),
         BPF_EXIT_INSN(),
-      };
+      });
     };
 
-    // We will use the catch-all for allow to determine if we should skip
-    // generating the allow section.
     bool allow_catch_all = [&allow]() {
       foreach (const Entry& entry, allow) {
         if (entry.is_catch_all()) {
@@ -1238,24 +1236,25 @@ public:
     }();
 
     // We will only add the code for the allow section if there is no catch-all
-    // allow entry present. If there is a catch-all, we wll skip everything
+    // allow entry present. If there is a catch-all, we will skip everything
     // in the allow section, including exit instruction at the end,
     // since we just need to check if the device is explicitly denied.
     if (!allow_catch_all) {
       // We calculate the total jump distance to skip over trailer instructions
       // at the end of the allow section, we initialize jump size to length of
-      // said instructions, then add the lengths of individual allow blocks to 
it.
+      // said instructions, then add the lengths of individual allow blocks.
       short start_of_deny_jmp_size = allow_section_trailer().size();
       vector<vector<bpf_insn>> allow_device_check_blocks = {};
       short allow_block_trailer_size = allow_block_trailer(0).size();
 
       foreach (const Entry& entry, allow) {
-        vector<bpf_insn> device_check_block =
+        vector<bpf_insn> allow_block =
           add_device_checks(entry, allow_block_trailer_size);
-        allow_device_check_blocks.push_back(device_check_block);
-        start_of_deny_jmp_size +=
-          device_check_block.size() + allow_block_trailer_size;
+        allow_device_check_blocks.push_back(allow_block);
+
+        start_of_deny_jmp_size += allow_block.size() + 
allow_block_trailer_size;
       }
+
       foreach (vector<bpf_insn>& allow_block, allow_device_check_blocks) {
         start_of_deny_jmp_size -=
           (allow_block.size() + allow_block_trailer_size);
@@ -1278,10 +1277,9 @@ public:
 
     // To reach this block, we must have matched with an entry in allow
     // to jump over the exit instruction at the end of allow blocks,
-    // or there is a catch-all in allow.
-    // We will also have to have not matched with any of the deny
-    // entries to avoid their exit instructions. Meaning that
-    // the device is on the allow list, and not on the deny list.
+    // or there is a catch-all in allow. We will also have to have not
+    // matched with any of the deny entries to avoid their exit instructions.
+    // Meaning that the device is on the allow list, and not on the deny list.
     // Hence, we grant them access.
     program.append(deny_section_trailer());
 
@@ -1290,7 +1288,8 @@ public:
 
 private:
   static vector<bpf_insn> add_device_checks(
-    const Entry& entry, short trailer_length)
+      const Entry& entry,
+         short trailer_length)
   {
     // We create a block of bytecode with the format:
     // 1. Major Version Check
@@ -1307,34 +1306,32 @@ private:
     // 2. One of (1,2,3,4) does not match the requested access and we skip
     //    the rest of the current block
 
-    vector<bpf_insn> device_check_block = {};
     if (entry.is_catch_all()) {
-      return device_check_block;
+      return {};
     }
 
-    auto check_major_instructions =
-      [](short jmp_size, int major) -> vector<bpf_insn> {
-      return {
+    auto check_major_instructions = [](short jmp_size, int major) {
+      return vector<bpf_insn>({
           BPF_JMP_IMM(
             BPF_JNE, BPF_REG_4, major, jmp_size),
-        };
+        });
     };
 
-    auto check_minor_instructions =
-      [](short jmp_size, int minor) -> vector<bpf_insn> {
-      return {
+    auto check_minor_instructions = [](short jmp_size, int minor) {
+      return vector<bpf_insn>({
           BPF_JMP_IMM(
             BPF_JNE, BPF_REG_5, minor, jmp_size),
-        };
+        });
     };
 
     auto check_access_instructions =
-      [](short jmp_size, const Entry::Access& access) -> vector<bpf_insn> {
+      [](short jmp_size, const Entry::Access& access)
+       {
       int bpf_access = 0;
       bpf_access |= access.read ? BPF_DEVCG_ACC_READ : 0;
       bpf_access |= access.write ? BPF_DEVCG_ACC_WRITE : 0;
       bpf_access |= access.mknod ? BPF_DEVCG_ACC_MKNOD : 0;
-      return {
+      return vector<bpf_insn>({
           BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
           BPF_ALU32_IMM(BPF_AND, BPF_REG_1, bpf_access),
           BPF_JMP_REG(
@@ -1342,7 +1339,7 @@ private:
             BPF_REG_1,
             BPF_REG_3,
             static_cast<short>(jmp_size - 2)),
-        };
+        });
     };
 
     auto check_type_instructions =
@@ -1383,6 +1380,7 @@ private:
     // instruction to land at the beginning of the next entry-block
     nxt_blk_jmp_size -= 1;
 
+    vector<bpf_insn> device_check_block = {};
 
     // 1. Check major version (r4) against entry.
     if (check_major) {

```


- Benjamin


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/75026/#review226649
-----------------------------------------------------------


On July 9, 2024, 11:30 p.m., Jason Zhou wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/75026/
> -----------------------------------------------------------
> 
> (Updated July 9, 2024, 11:30 p.m.)
> 
> 
> Review request for mesos and Benjamin Mahler.
> 
> 
> Repository: mesos
> 
> 
> Description
> -------
> 
> Currently, the EBPF program we generate has the behavior where the deny
> list has no effect, as we will allow device access iff the device
> matched with an allow entry.
> 
> Instead we want to grant access to a device iff it is in a cgroup's
> allow list *and not in its deny list.*
> 
> This means that we need to change our existing logic, which exits on the
> first match. It is not our desired behavior because the current EBPF
> program construction logic puts the allow-device checks before the
> deny-device checks, meaning that if a device is on both allow and deny
> lists for a cgroup, it will be granted access.
> 
> This change revamps the EBPF program construction to now check both the
> allow and deny list of a cgroup before determining whether access may be
> granted. Specifically, if a device is matched with an entry inside the
> allow list, we will also be checking if it matches with any entry on
> the deny list, and deny the device's access if that is the case.
> 
> We also avoid generating specific parts of the EBPF program code to
> avoid creating unreachable code, explanations with a diagram are
> attached above the cgroups2::devices::DeviceProgram::build function.
> 
> 
> Diffs
> -----
> 
>   src/linux/cgroups.hpp 9be53e31e46129f019deeda5b7690f3b4e4004ce 
>   src/linux/cgroups.cpp c1272fbcac9926a378b0d8f69263be214fc21c5e 
>   src/linux/cgroups2.hpp 64254d04f65128713cf3489b25bcba42590b6020 
>   src/linux/cgroups2.cpp d1fc2638cdf9a07199f90952e04998072021011c 
>   src/tests/containerizer/cgroups2_tests.cpp 
> cb1e229f7f40aa71f57417c33fccb2cfb313a1f5 
> 
> 
> Diff: https://reviews.apache.org/r/75026/diff/12/
> 
> 
> Testing
> -------
> 
> All Cgroups2 tests pass i.e. the generated ebpf files pass the verifiers, 
> tests added for new behavior for when device is on both allow and deny list, 
> and test that mismatched entries are ignored.
> 
> 
> Thanks,
> 
> Jason Zhou
> 
>

Reply via email to