Github user kayousterhout commented on a diff in the pull request:

    https://github.com/apache/spark/pull/15604#discussion_r94505365
  
    --- Diff: core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala ---
    @@ -178,4 +180,97 @@ class PoolSuite extends SparkFunSuite with 
LocalSparkContext {
         scheduleTaskAndVerifyId(2, rootPool, 6)
         scheduleTaskAndVerifyId(3, rootPool, 2)
       }
    +
    +  test("SPARK-18066: FIFO Scheduler just uses root pool") {
    +    sc = new SparkContext("local", "PoolSuite")
    +    val taskScheduler = new TaskSchedulerImpl(sc)
    +
    +    val rootPool = new Pool("", SchedulingMode.FIFO, initMinShare = 0, 
initWeight = 0)
    +    val schedulableBuilder = new FIFOSchedulableBuilder(rootPool)
    +
    +    val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1, 
taskScheduler)
    +    val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1, 
taskScheduler)
    +
    +    val properties = new Properties()
    +    properties.setProperty("spark.scheduler.pool", TEST_POOL)
    +
    +    // FIFOSchedulableBuilder just uses rootPool so even if properties are 
set, related pool
    +    // (testPool) is not created and TaskSetManagers are added to rootPool
    +    schedulableBuilder.addTaskSetManager(taskSetManager0, properties)
    +    schedulableBuilder.addTaskSetManager(taskSetManager1, properties)
    +
    +    assert(rootPool.getSchedulableByName(TEST_POOL) == null)
    +    assert(rootPool.schedulableQueue.size == 2)
    +    assert(rootPool.getSchedulableByName(taskSetManager0.name) === 
taskSetManager0)
    +    assert(rootPool.getSchedulableByName(taskSetManager1.name) === 
taskSetManager1)
    +  }
    +
    +  test("SPARK-18066: FAIR Scheduler uses default pool when 
spark.scheduler.pool property is not " +
    +    "set") {
    +    sc = new SparkContext("local", "PoolSuite")
    +    val taskScheduler = new TaskSchedulerImpl(sc)
    +
    +    val rootPool = new Pool("", SchedulingMode.FAIR, initMinShare = 0, 
initWeight = 0)
    +    val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
    +    schedulableBuilder.buildPools()
    +
    +    // FAIR Scheduler uses default pool when pool properties are null
    +    val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1, 
taskScheduler)
    +
    +    schedulableBuilder.addTaskSetManager(taskSetManager0, null)
    +
    +    val defaultPool = 
rootPool.getSchedulableByName(schedulableBuilder.DEFAULT_POOL_NAME)
    +    assert(defaultPool != null)
    +    assert(defaultPool.schedulableQueue.size == 1)
    +    assert(defaultPool.getSchedulableByName(taskSetManager0.name) === 
taskSetManager0)
    +
    +    // FAIR Scheduler uses default pool when spark.scheduler.pool property 
is not set
    +    val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1, 
taskScheduler)
    +
    +    schedulableBuilder.addTaskSetManager(taskSetManager1, new Properties())
    +
    +    assert(defaultPool.schedulableQueue.size == 2)
    +    assert(defaultPool.getSchedulableByName(taskSetManager1.name) === 
taskSetManager1)
    +
    +    // FAIR Scheduler uses default pool when spark.scheduler.pool property 
is set as default pool
    +    val taskSetManager2 = createTaskSetManager(stageId = 2, numTasks = 1, 
taskScheduler)
    +
    +    val properties = new Properties()
    +    properties.setProperty(schedulableBuilder.FAIR_SCHEDULER_PROPERTIES, 
schedulableBuilder
    +      .DEFAULT_POOL_NAME)
    +
    +    schedulableBuilder.addTaskSetManager(taskSetManager2, properties)
    +
    +    assert(defaultPool.schedulableQueue.size == 3)
    +    assert(defaultPool.getSchedulableByName(taskSetManager2.name) === 
taskSetManager2)
    +  }
    +
    +  test("SPARK-18066: FAIR Scheduler creates a new pool when 
spark.scheduler.pool property points " +
    +    "non-existent") {
    --- End diff --
    
    I tracked this down and looks like it was changed [back in 
2013](https://github.com/apache/spark/commit/5892393140eb024a32585b6d5b51146ddde8f63a#diff-30ccb25ca46ef204d2446b03ae4f1117R103).
 @xiajunluan do you remember the motivation for this?
    
    I'm torn because in general I prefer failing fast (e.g., this is better in 
the case where someone has a small typo they might not notice, and is intending 
to use a configured pool).  On the other hand, I'm pretty sure I've abused this 
behavior in the past to avoid configuring pools when I want a bunch of equally 
weighted pools.  If others are doing that, we shouldn't change the behavior 
(although I think that, in that case, we should add a big warning / 
future-deprecation message).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to