> On July 10, 2024, 8:18 p.m., Benjamin Mahler wrote:
> > Ship It!
```
diff --git a/src/linux/cgroups2.cpp b/src/linux/cgroups2.cpp
index 9ae8df387..59c212ebc 100644
--- a/src/linux/cgroups2.cpp
+++ b/src/linux/cgroups2.cpp
@@ -1164,7 +1164,8 @@ public:
// |Exit instruction to deny access |
// +-------------------------------------------------------------+
static Try<ebpf::Program> build(
- const vector<Entry>& allow, const vector<Entry>& deny)
+ const vector<Entry>& allow,
+ const vector<Entry>& deny)
{
// The BPF_PROG_TYPE_CGROUP_DEVICE program takes in
// `struct bpf_cgroup_dev_ctx*` as input. We extract the fields into
@@ -1209,25 +1210,22 @@ public:
return program;
}
- auto allow_block_trailer =
- [](short jmp_size_to_deny_section) -> vector<bpf_insn> {
- return {BPF_JMP_A(jmp_size_to_deny_section)};
+ auto allow_block_trailer = [](short jmp_size_to_deny_section) {
+ return vector<bpf_insn>({BPF_JMP_A(jmp_size_to_deny_section)});
};
- auto allow_section_trailer = []() -> vector<bpf_insn> {
- return {BPF_EXIT_INSN()};
+ auto allow_section_trailer = []() {
+ return vector<bpf_insn>({BPF_EXIT_INSN()});
};
- auto deny_block_trailer = []() -> vector<bpf_insn> {
- return {BPF_EXIT_INSN()};
+ auto deny_block_trailer = []() {
+ return vector<bpf_insn>({BPF_EXIT_INSN()});
};
- auto deny_section_trailer = []() -> vector<bpf_insn> {
- return {
+ auto deny_section_trailer = []() {
+ return vector<bpf_insn>({
BPF_MOV64_IMM(BPF_REG_0, ALLOW_ACCESS),
BPF_EXIT_INSN(),
- };
+ });
};
- // We will use the catch-all for allow to determine if we should skip
- // generating the allow section.
bool allow_catch_all = [&allow]() {
foreach (const Entry& entry, allow) {
if (entry.is_catch_all()) {
@@ -1238,24 +1236,25 @@ public:
}();
// We will only add the code for the allow section if there is no catch-all
- // allow entry present. If there is a catch-all, we wll skip everything
+ // allow entry present. If there is a catch-all, we will skip everything
// in the allow section, including exit instruction at the end,
// since we just need to check if the device is explicitly denied.
if (!allow_catch_all) {
// We calculate the total jump distance to skip over trailer instructions
// at the end of the allow section, we initialize jump size to length of
- // said instructions, then add the lengths of individual allow blocks to
it.
+ // said instructions, then add the lengths of individual allow blocks.
short start_of_deny_jmp_size = allow_section_trailer().size();
vector<vector<bpf_insn>> allow_device_check_blocks = {};
short allow_block_trailer_size = allow_block_trailer(0).size();
foreach (const Entry& entry, allow) {
- vector<bpf_insn> device_check_block =
+ vector<bpf_insn> allow_block =
add_device_checks(entry, allow_block_trailer_size);
- allow_device_check_blocks.push_back(device_check_block);
- start_of_deny_jmp_size +=
- device_check_block.size() + allow_block_trailer_size;
+ allow_device_check_blocks.push_back(allow_block);
+
+ start_of_deny_jmp_size += allow_block.size() +
allow_block_trailer_size;
}
+
foreach (vector<bpf_insn>& allow_block, allow_device_check_blocks) {
start_of_deny_jmp_size -=
(allow_block.size() + allow_block_trailer_size);
@@ -1278,10 +1277,9 @@ public:
// To reach this block, we must have matched with an entry in allow
// to jump over the exit instruction at the end of allow blocks,
- // or there is a catch-all in allow.
- // We will also have to have not matched with any of the deny
- // entries to avoid their exit instructions. Meaning that
- // the device is on the allow list, and not on the deny list.
+ // or there is a catch-all in allow. We will also have to have not
+ // matched with any of the deny entries to avoid their exit instructions.
+ // Meaning that the device is on the allow list, and not on the deny list.
// Hence, we grant them access.
program.append(deny_section_trailer());
@@ -1290,7 +1288,8 @@ public:
private:
static vector<bpf_insn> add_device_checks(
- const Entry& entry, short trailer_length)
+ const Entry& entry,
+ short trailer_length)
{
// We create a block of bytecode with the format:
// 1. Major Version Check
@@ -1307,34 +1306,32 @@ private:
// 2. One of (1,2,3,4) does not match the requested access and we skip
// the rest of the current block
- vector<bpf_insn> device_check_block = {};
if (entry.is_catch_all()) {
- return device_check_block;
+ return {};
}
- auto check_major_instructions =
- [](short jmp_size, int major) -> vector<bpf_insn> {
- return {
+ auto check_major_instructions = [](short jmp_size, int major) {
+ return vector<bpf_insn>({
BPF_JMP_IMM(
BPF_JNE, BPF_REG_4, major, jmp_size),
- };
+ });
};
- auto check_minor_instructions =
- [](short jmp_size, int minor) -> vector<bpf_insn> {
- return {
+ auto check_minor_instructions = [](short jmp_size, int minor) {
+ return vector<bpf_insn>({
BPF_JMP_IMM(
BPF_JNE, BPF_REG_5, minor, jmp_size),
- };
+ });
};
auto check_access_instructions =
- [](short jmp_size, const Entry::Access& access) -> vector<bpf_insn> {
+ [](short jmp_size, const Entry::Access& access)
+ {
int bpf_access = 0;
bpf_access |= access.read ? BPF_DEVCG_ACC_READ : 0;
bpf_access |= access.write ? BPF_DEVCG_ACC_WRITE : 0;
bpf_access |= access.mknod ? BPF_DEVCG_ACC_MKNOD : 0;
- return {
+ return vector<bpf_insn>({
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, bpf_access),
BPF_JMP_REG(
@@ -1342,7 +1339,7 @@ private:
BPF_REG_1,
BPF_REG_3,
static_cast<short>(jmp_size - 2)),
- };
+ });
};
auto check_type_instructions =
@@ -1383,6 +1380,7 @@ private:
// instruction to land at the beginning of the next entry-block
nxt_blk_jmp_size -= 1;
+ vector<bpf_insn> device_check_block = {};
// 1. Check major version (r4) against entry.
if (check_major) {
```
- Benjamin
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/75026/#review226649
-----------------------------------------------------------
On July 9, 2024, 11:30 p.m., Jason Zhou wrote:
>
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/75026/
> -----------------------------------------------------------
>
> (Updated July 9, 2024, 11:30 p.m.)
>
>
> Review request for mesos and Benjamin Mahler.
>
>
> Repository: mesos
>
>
> Description
> -------
>
> Currently, the EBPF program we generate has the behavior where the deny
> list has no effect, as we will allow device access iff the device
> matched with an allow entry.
>
> Instead we want to grant access to a device iff it is in a cgroup's
> allow list *and not in its deny list.*
>
> This means that we need to change our existing logic, which exits on the
> first match. It is not our desired behavior because the current EBPF
> program construction logic puts the allow-device checks before the
> deny-device checks, meaning that if a device is on both allow and deny
> lists for a cgroup, it will be granted access.
>
> This change revamps the EBPF program construction to now check both the
> allow and deny list of a cgroup before determining whether access may be
> granted. Specifically, if a device is matched with an entry inside the
> allow list, we will also be checking if it matches with any entry on
> the deny list, and deny the device's access if that is the case.
>
> We also avoid generating specific parts of the EBPF program code to
> avoid creating unreachable code, explanations with a diagram are
> attached above the cgroups2::devices::DeviceProgram::build function.
>
>
> Diffs
> -----
>
> src/linux/cgroups.hpp 9be53e31e46129f019deeda5b7690f3b4e4004ce
> src/linux/cgroups.cpp c1272fbcac9926a378b0d8f69263be214fc21c5e
> src/linux/cgroups2.hpp 64254d04f65128713cf3489b25bcba42590b6020
> src/linux/cgroups2.cpp d1fc2638cdf9a07199f90952e04998072021011c
> src/tests/containerizer/cgroups2_tests.cpp
> cb1e229f7f40aa71f57417c33fccb2cfb313a1f5
>
>
> Diff: https://reviews.apache.org/r/75026/diff/12/
>
>
> Testing
> -------
>
> All Cgroups2 tests pass i.e. the generated ebpf files pass the verifiers,
> tests added for new behavior for when device is on both allow and deny list,
> and test that mismatched entries are ignored.
>
>
> Thanks,
>
> Jason Zhou
>
>