Github user squito commented on a diff in the pull request:

    https://github.com/apache/spark/pull/15604#discussion_r94499385
  
    --- Diff: core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala ---
    @@ -178,4 +180,97 @@ class PoolSuite extends SparkFunSuite with 
LocalSparkContext {
         scheduleTaskAndVerifyId(2, rootPool, 6)
         scheduleTaskAndVerifyId(3, rootPool, 2)
       }
    +
    +  test("SPARK-18066: FIFO Scheduler just uses root pool") {
    +    sc = new SparkContext("local", "PoolSuite")
    +    val taskScheduler = new TaskSchedulerImpl(sc)
    +
    +    val rootPool = new Pool("", SchedulingMode.FIFO, initMinShare = 0, 
initWeight = 0)
    +    val schedulableBuilder = new FIFOSchedulableBuilder(rootPool)
    +
    +    val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1, 
taskScheduler)
    +    val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1, 
taskScheduler)
    +
    +    val properties = new Properties()
    +    properties.setProperty("spark.scheduler.pool", TEST_POOL)
    +
    +    // FIFOSchedulableBuilder just uses rootPool so even if properties are 
set, related pool
    +    // (testPool) is not created and TaskSetManagers are added to rootPool
    +    schedulableBuilder.addTaskSetManager(taskSetManager0, properties)
    +    schedulableBuilder.addTaskSetManager(taskSetManager1, properties)
    +
    +    assert(rootPool.getSchedulableByName(TEST_POOL) == null)
    +    assert(rootPool.schedulableQueue.size == 2)
    +    assert(rootPool.getSchedulableByName(taskSetManager0.name) === 
taskSetManager0)
    +    assert(rootPool.getSchedulableByName(taskSetManager1.name) === 
taskSetManager1)
    +  }
    +
    +  test("SPARK-18066: FAIR Scheduler uses default pool when 
spark.scheduler.pool property is not " +
    +    "set") {
    +    sc = new SparkContext("local", "PoolSuite")
    +    val taskScheduler = new TaskSchedulerImpl(sc)
    +
    +    val rootPool = new Pool("", SchedulingMode.FAIR, initMinShare = 0, 
initWeight = 0)
    +    val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
    +    schedulableBuilder.buildPools()
    +
    +    // FAIR Scheduler uses default pool when pool properties are null
    +    val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1, 
taskScheduler)
    +
    +    schedulableBuilder.addTaskSetManager(taskSetManager0, null)
    +
    +    val defaultPool = 
rootPool.getSchedulableByName(schedulableBuilder.DEFAULT_POOL_NAME)
    +    assert(defaultPool != null)
    +    assert(defaultPool.schedulableQueue.size == 1)
    +    assert(defaultPool.getSchedulableByName(taskSetManager0.name) === 
taskSetManager0)
    +
    +    // FAIR Scheduler uses default pool when spark.scheduler.pool property 
is not set
    +    val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1, 
taskScheduler)
    +
    +    schedulableBuilder.addTaskSetManager(taskSetManager1, new Properties())
    +
    +    assert(defaultPool.schedulableQueue.size == 2)
    +    assert(defaultPool.getSchedulableByName(taskSetManager1.name) === 
taskSetManager1)
    +
    +    // FAIR Scheduler uses default pool when spark.scheduler.pool property 
is set as default pool
    +    val taskSetManager2 = createTaskSetManager(stageId = 2, numTasks = 1, 
taskScheduler)
    +
    +    val properties = new Properties()
    +    properties.setProperty(schedulableBuilder.FAIR_SCHEDULER_PROPERTIES, 
schedulableBuilder
    +      .DEFAULT_POOL_NAME)
    +
    +    schedulableBuilder.addTaskSetManager(taskSetManager2, properties)
    +
    +    assert(defaultPool.schedulableQueue.size == 3)
    +    assert(defaultPool.getSchedulableByName(taskSetManager2.name) === 
taskSetManager2)
    +  }
    +
    +  test("SPARK-18066: FAIR Scheduler creates a new pool when 
spark.scheduler.pool property points " +
    +    "non-existent") {
    --- End diff --
    
    minor nit: rename to "FAIR Scheduler creates a new pool when 
spark.scheduler.pool property points to a non-existent pool"
    
    bigger question:
    @markhamstra @kayousterhout Is this really the desired behavior?  Or is 
there a bug -- should it fail fast?  It looks like [this behavior was 
intentional](https://github.com/apache/spark/blob/b67b35f76b684c5176dc683e7491fd01b43f4467/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala#L143-L145).
  Then again, that comment makes it sound like the user is going to directly 
modify the `weight` and `minShare` of the constructed pool, but though [they 
are 
`var`s](https://github.com/apache/spark/blob/b67b35f76b684c5176dc683e7491fd01b43f4467/core/src/main/scala/org/apache/spark/scheduler/Pool.scala#L40-L41),
 its `private [spark]` and there isn't anything else exposing it to the user.
    
    I prefer fail-fast behavior, but it seems like fair scheduler configuration 
prefers to assume defaults when there is misconfiguration.  If you think this 
the right behavior, than there is probably some minor cleanup to do in Pool & 
SchedulableBuilder to clarify. 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to