Github user kayousterhout commented on a diff in the pull request:
https://github.com/apache/spark/pull/15604#discussion_r94505633
--- Diff: core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala ---
@@ -178,4 +180,97 @@ class PoolSuite extends SparkFunSuite with
LocalSparkContext {
scheduleTaskAndVerifyId(2, rootPool, 6)
scheduleTaskAndVerifyId(3, rootPool, 2)
}
+
+ test("SPARK-18066: FIFO Scheduler just uses root pool") {
+ sc = new SparkContext("local", "PoolSuite")
+ val taskScheduler = new TaskSchedulerImpl(sc)
+
+ val rootPool = new Pool("", SchedulingMode.FIFO, initMinShare = 0,
initWeight = 0)
+ val schedulableBuilder = new FIFOSchedulableBuilder(rootPool)
+
+ val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1,
taskScheduler)
+ val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1,
taskScheduler)
+
+ val properties = new Properties()
+ properties.setProperty("spark.scheduler.pool", TEST_POOL)
+
+ // FIFOSchedulableBuilder just uses rootPool so even if properties are
set, related pool
+ // (testPool) is not created and TaskSetManagers are added to rootPool
+ schedulableBuilder.addTaskSetManager(taskSetManager0, properties)
+ schedulableBuilder.addTaskSetManager(taskSetManager1, properties)
+
+ assert(rootPool.getSchedulableByName(TEST_POOL) == null)
+ assert(rootPool.schedulableQueue.size == 2)
+ assert(rootPool.getSchedulableByName(taskSetManager0.name) ===
taskSetManager0)
+ assert(rootPool.getSchedulableByName(taskSetManager1.name) ===
taskSetManager1)
+ }
+
+ test("SPARK-18066: FAIR Scheduler uses default pool when
spark.scheduler.pool property is not " +
+ "set") {
+ sc = new SparkContext("local", "PoolSuite")
+ val taskScheduler = new TaskSchedulerImpl(sc)
+
+ val rootPool = new Pool("", SchedulingMode.FAIR, initMinShare = 0,
initWeight = 0)
+ val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
+ schedulableBuilder.buildPools()
+
+ // FAIR Scheduler uses default pool when pool properties are null
+ val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1,
taskScheduler)
+
+ schedulableBuilder.addTaskSetManager(taskSetManager0, null)
+
+ val defaultPool =
rootPool.getSchedulableByName(schedulableBuilder.DEFAULT_POOL_NAME)
+ assert(defaultPool != null)
+ assert(defaultPool.schedulableQueue.size == 1)
+ assert(defaultPool.getSchedulableByName(taskSetManager0.name) ===
taskSetManager0)
+
+ // FAIR Scheduler uses default pool when spark.scheduler.pool property
is not set
+ val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1,
taskScheduler)
+
+ schedulableBuilder.addTaskSetManager(taskSetManager1, new Properties())
+
+ assert(defaultPool.schedulableQueue.size == 2)
+ assert(defaultPool.getSchedulableByName(taskSetManager1.name) ===
taskSetManager1)
+
+ // FAIR Scheduler uses default pool when spark.scheduler.pool property
is set as default pool
+ val taskSetManager2 = createTaskSetManager(stageId = 2, numTasks = 1,
taskScheduler)
+
+ val properties = new Properties()
+ properties.setProperty(schedulableBuilder.FAIR_SCHEDULER_PROPERTIES,
schedulableBuilder
+ .DEFAULT_POOL_NAME)
+
+ schedulableBuilder.addTaskSetManager(taskSetManager2, properties)
+
+ assert(defaultPool.schedulableQueue.size == 3)
+ assert(defaultPool.getSchedulableByName(taskSetManager2.name) ===
taskSetManager2)
+ }
+
+ test("SPARK-18066: FAIR Scheduler creates a new pool when
spark.scheduler.pool property points " +
+ "non-existent") {
+ sc = new SparkContext("local", "PoolSuite")
+ val taskScheduler = new TaskSchedulerImpl(sc)
+
+ val rootPool = new Pool("", SchedulingMode.FAIR, initMinShare = 0,
initWeight = 0)
+ val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
+ schedulableBuilder.buildPools()
+
+ assert(rootPool.getSchedulableByName(TEST_POOL) == null)
+
+ val taskSetManager = createTaskSetManager(stageId = 0, numTasks = 1,
taskScheduler)
+
+ val properties = new Properties()
+ properties.setProperty(schedulableBuilder.FAIR_SCHEDULER_PROPERTIES,
TEST_POOL)
+
+ // FAIR Scheduler creates a new pool with default values when
spark.scheduler.pool property
+ // points non-existent pool. This can be happened when scheduler
allocation file is not set or
+ // it does not contain related pool
--- End diff --
can you clean up this comment a bit too?
"The fair scheduler should create a new pool with default values when
spark.scheduler.pool points to a pool that doesn't exist yet (this can happen
when the file that pools are read from isn't set, or when that file doesn't
contain the pool name specified by spark.scheduler.pool)."
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]