[ 
https://issues.apache.org/jira/browse/MESOS-7385?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jay Guo updated MESOS-7385:
---------------------------
    Description: 
Mesos currently implements naive H-DRF algorithm, as described in [h-drf 
paper|https://people.eecs.berkeley.edu/~alig/papers/h-drf.pdf], which may incur 
starvation due to `dovetailing`. Essentially, following test should pass:
{code}
TEST_F(HierarchicalAllocatorTest, Starvation)
{
  Clock::pause();

  initialize();

  const string ROLE1 = "a";
  const string ROLE2 = "b/c";
  const string ROLE3 = "b/d";

  FrameworkInfo framework1 = createFrameworkInfo({ROLE1});
  allocator->addFramework(framework1.id(), framework1, {}, true);

  SlaveInfo agent1 = createSlaveInfo("cpus:1");
  allocator->addSlave(
      agent1.id(),
      agent1,
      AGENT_CAPABILITIES(),
      None(),
      agent1.resources(),
      {});

  // `framework1` will be offered all of the resources on `agent1`.
  {
    Allocation expected = Allocation(
        framework1.id(),
        {{ROLE1, {{agent1.id(), agent1.resources()}}}});

    AWAIT_EXPECT_EQ(expected, allocations.get());
  }

  // Create `framework2` in the child role.
  FrameworkInfo framework2 = createFrameworkInfo({ROLE2});
  allocator->addFramework(framework2.id(), framework2, {}, true);

  SlaveInfo agent2 = createSlaveInfo("mem:32");
  allocator->addSlave(
      agent2.id(),
      agent2,
      AGENT_CAPABILITIES(),
      None(),
      agent2.resources(),
      {});

  {
    Allocation expected = Allocation(
        framework2.id(),
        {{ROLE2, {{agent2.id(), agent2.resources()}}}});

    AWAIT_EXPECT_EQ(expected, allocations.get());
  }

  // Create `framework3` in the child role.
  FrameworkInfo framework3 = createFrameworkInfo({ROLE3});
  allocator->addFramework(framework3.id(), framework3, {}, true);

  SlaveInfo agent3 = createSlaveInfo("cpus:1");
  allocator->addSlave(
      agent3.id(),
      agent3,
      AGENT_CAPABILITIES(),
      None(),
      agent3.resources(),
      {});

  // Current fair share is:
  // - `framework1`: 50% (1/2 cpus)
  // - `framework2`: 100% (32/32 mem)
  // - `framework3`: 0% (0/2 cpus)
  // So `framework3` should be offered all of the resources on `agent3`.
  // However, `framework3` is punished due to naive h-drf implementation,
  // where fair share of parent role `b` has fair share of 100%, which
  // leads to starvation.
  {
    Allocation expected = Allocation(
        framework3.id(),
        {{ROLE3, {{agent3.id(), agent3.resources()}}}});

    AWAIT_EXPECT_EQ(expected, allocations.get()); // It fails!
  }
}
{code}

This JIRA is created to make sure this behavior is captured and will be 
addressed in the future. Note that it affects current implementation without 
hierarchical role as well.

  was:
Mesos currently implements naive H-DRF algorithm, as described in [h-drf 
paper|https://people.eecs.berkeley.edu/~alig/papers/h-drf.pdf], which may incur 
starvation due to `dovetailing`. Essentially, following test should pass:
{{code}}
TEST_F(HierarchicalAllocatorTest, Starvation)
{
  Clock::pause();

  initialize();

  const string ROLE1 = "a";
  const string ROLE2 = "b/c";
  const string ROLE3 = "b/d";

  FrameworkInfo framework1 = createFrameworkInfo({ROLE1});
  allocator->addFramework(framework1.id(), framework1, {}, true);

  SlaveInfo agent1 = createSlaveInfo("cpus:1");
  allocator->addSlave(
      agent1.id(),
      agent1,
      AGENT_CAPABILITIES(),
      None(),
      agent1.resources(),
      {});

  // `framework1` will be offered all of the resources on `agent1`.
  {
    Allocation expected = Allocation(
        framework1.id(),
        {{ROLE1, {{agent1.id(), agent1.resources()}}}});

    AWAIT_EXPECT_EQ(expected, allocations.get());
  }

  // Create `framework2` in the child role.
  FrameworkInfo framework2 = createFrameworkInfo({ROLE2});
  allocator->addFramework(framework2.id(), framework2, {}, true);

  SlaveInfo agent2 = createSlaveInfo("mem:32");
  allocator->addSlave(
      agent2.id(),
      agent2,
      AGENT_CAPABILITIES(),
      None(),
      agent2.resources(),
      {});

  {
    Allocation expected = Allocation(
        framework2.id(),
        {{ROLE2, {{agent2.id(), agent2.resources()}}}});

    AWAIT_EXPECT_EQ(expected, allocations.get());
  }

  // Create `framework3` in the child role.
  FrameworkInfo framework3 = createFrameworkInfo({ROLE3});
  allocator->addFramework(framework3.id(), framework3, {}, true);

  SlaveInfo agent3 = createSlaveInfo("cpus:1");
  allocator->addSlave(
      agent3.id(),
      agent3,
      AGENT_CAPABILITIES(),
      None(),
      agent3.resources(),
      {});

  // Current fair share is:
  // - `framework1`: 50% (1/2 cpus)
  // - `framework2`: 100% (32/32 mem)
  // - `framework3`: 0% (0/2 cpus)
  // So `framework3` should be offered all of the resources on `agent3`.
  // However, `framework3` is punished due to naive h-drf implementation,
  // where fair share of parent role `b` has fair share of 100%, which
  // leads to starvation.
  {
    Allocation expected = Allocation(
        framework3.id(),
        {{ROLE3, {{agent3.id(), agent3.resources()}}}});

    AWAIT_EXPECT_EQ(expected, allocations.get()); // It fails!
  }
}
{{code}}

This JIRA is created to make sure this behavior is captured and will be 
addressed in the future. Note that it affects current implementation without 
hierarchical role as well.


> Framework should not starve due to `dovetailing` in naive H-DRF 
> implementation.
> -------------------------------------------------------------------------------
>
>                 Key: MESOS-7385
>                 URL: https://issues.apache.org/jira/browse/MESOS-7385
>             Project: Mesos
>          Issue Type: Bug
>          Components: master
>            Reporter: Jay Guo
>
> Mesos currently implements naive H-DRF algorithm, as described in [h-drf 
> paper|https://people.eecs.berkeley.edu/~alig/papers/h-drf.pdf], which may 
> incur starvation due to `dovetailing`. Essentially, following test should 
> pass:
> {code}
> TEST_F(HierarchicalAllocatorTest, Starvation)
> {
>   Clock::pause();
>   initialize();
>   const string ROLE1 = "a";
>   const string ROLE2 = "b/c";
>   const string ROLE3 = "b/d";
>   FrameworkInfo framework1 = createFrameworkInfo({ROLE1});
>   allocator->addFramework(framework1.id(), framework1, {}, true);
>   SlaveInfo agent1 = createSlaveInfo("cpus:1");
>   allocator->addSlave(
>       agent1.id(),
>       agent1,
>       AGENT_CAPABILITIES(),
>       None(),
>       agent1.resources(),
>       {});
>   // `framework1` will be offered all of the resources on `agent1`.
>   {
>     Allocation expected = Allocation(
>         framework1.id(),
>         {{ROLE1, {{agent1.id(), agent1.resources()}}}});
>     AWAIT_EXPECT_EQ(expected, allocations.get());
>   }
>   // Create `framework2` in the child role.
>   FrameworkInfo framework2 = createFrameworkInfo({ROLE2});
>   allocator->addFramework(framework2.id(), framework2, {}, true);
>   SlaveInfo agent2 = createSlaveInfo("mem:32");
>   allocator->addSlave(
>       agent2.id(),
>       agent2,
>       AGENT_CAPABILITIES(),
>       None(),
>       agent2.resources(),
>       {});
>   {
>     Allocation expected = Allocation(
>         framework2.id(),
>         {{ROLE2, {{agent2.id(), agent2.resources()}}}});
>     AWAIT_EXPECT_EQ(expected, allocations.get());
>   }
>   // Create `framework3` in the child role.
>   FrameworkInfo framework3 = createFrameworkInfo({ROLE3});
>   allocator->addFramework(framework3.id(), framework3, {}, true);
>   SlaveInfo agent3 = createSlaveInfo("cpus:1");
>   allocator->addSlave(
>       agent3.id(),
>       agent3,
>       AGENT_CAPABILITIES(),
>       None(),
>       agent3.resources(),
>       {});
>   // Current fair share is:
>   // - `framework1`: 50% (1/2 cpus)
>   // - `framework2`: 100% (32/32 mem)
>   // - `framework3`: 0% (0/2 cpus)
>   // So `framework3` should be offered all of the resources on `agent3`.
>   // However, `framework3` is punished due to naive h-drf implementation,
>   // where fair share of parent role `b` has fair share of 100%, which
>   // leads to starvation.
>   {
>     Allocation expected = Allocation(
>         framework3.id(),
>         {{ROLE3, {{agent3.id(), agent3.resources()}}}});
>     AWAIT_EXPECT_EQ(expected, allocations.get()); // It fails!
>   }
> }
> {code}
> This JIRA is created to make sure this behavior is captured and will be 
> addressed in the future. Note that it affects current implementation without 
> hierarchical role as well.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to