Github user squito commented on a diff in the pull request:
https://github.com/apache/spark/pull/15604#discussion_r94499385
--- Diff: core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala ---
@@ -178,4 +180,97 @@ class PoolSuite extends SparkFunSuite with
LocalSparkContext {
scheduleTaskAndVerifyId(2, rootPool, 6)
scheduleTaskAndVerifyId(3, rootPool, 2)
}
+
+ test("SPARK-18066: FIFO Scheduler just uses root pool") {
+ sc = new SparkContext("local", "PoolSuite")
+ val taskScheduler = new TaskSchedulerImpl(sc)
+
+ val rootPool = new Pool("", SchedulingMode.FIFO, initMinShare = 0,
initWeight = 0)
+ val schedulableBuilder = new FIFOSchedulableBuilder(rootPool)
+
+ val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1,
taskScheduler)
+ val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1,
taskScheduler)
+
+ val properties = new Properties()
+ properties.setProperty("spark.scheduler.pool", TEST_POOL)
+
+ // FIFOSchedulableBuilder just uses rootPool so even if properties are
set, related pool
+ // (testPool) is not created and TaskSetManagers are added to rootPool
+ schedulableBuilder.addTaskSetManager(taskSetManager0, properties)
+ schedulableBuilder.addTaskSetManager(taskSetManager1, properties)
+
+ assert(rootPool.getSchedulableByName(TEST_POOL) == null)
+ assert(rootPool.schedulableQueue.size == 2)
+ assert(rootPool.getSchedulableByName(taskSetManager0.name) ===
taskSetManager0)
+ assert(rootPool.getSchedulableByName(taskSetManager1.name) ===
taskSetManager1)
+ }
+
+ test("SPARK-18066: FAIR Scheduler uses default pool when
spark.scheduler.pool property is not " +
+ "set") {
+ sc = new SparkContext("local", "PoolSuite")
+ val taskScheduler = new TaskSchedulerImpl(sc)
+
+ val rootPool = new Pool("", SchedulingMode.FAIR, initMinShare = 0,
initWeight = 0)
+ val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
+ schedulableBuilder.buildPools()
+
+ // FAIR Scheduler uses default pool when pool properties are null
+ val taskSetManager0 = createTaskSetManager(stageId = 0, numTasks = 1,
taskScheduler)
+
+ schedulableBuilder.addTaskSetManager(taskSetManager0, null)
+
+ val defaultPool =
rootPool.getSchedulableByName(schedulableBuilder.DEFAULT_POOL_NAME)
+ assert(defaultPool != null)
+ assert(defaultPool.schedulableQueue.size == 1)
+ assert(defaultPool.getSchedulableByName(taskSetManager0.name) ===
taskSetManager0)
+
+ // FAIR Scheduler uses default pool when spark.scheduler.pool property
is not set
+ val taskSetManager1 = createTaskSetManager(stageId = 1, numTasks = 1,
taskScheduler)
+
+ schedulableBuilder.addTaskSetManager(taskSetManager1, new Properties())
+
+ assert(defaultPool.schedulableQueue.size == 2)
+ assert(defaultPool.getSchedulableByName(taskSetManager1.name) ===
taskSetManager1)
+
+ // FAIR Scheduler uses default pool when spark.scheduler.pool property
is set as default pool
+ val taskSetManager2 = createTaskSetManager(stageId = 2, numTasks = 1,
taskScheduler)
+
+ val properties = new Properties()
+ properties.setProperty(schedulableBuilder.FAIR_SCHEDULER_PROPERTIES,
schedulableBuilder
+ .DEFAULT_POOL_NAME)
+
+ schedulableBuilder.addTaskSetManager(taskSetManager2, properties)
+
+ assert(defaultPool.schedulableQueue.size == 3)
+ assert(defaultPool.getSchedulableByName(taskSetManager2.name) ===
taskSetManager2)
+ }
+
+ test("SPARK-18066: FAIR Scheduler creates a new pool when
spark.scheduler.pool property points " +
+ "non-existent") {
--- End diff --
minor nit: rename to "FAIR Scheduler creates a new pool when
spark.scheduler.pool property points to a non-existent pool"
bigger question:
@markhamstra @kayousterhout Is this really the desired behavior? Or is
there a bug -- should it fail fast? It looks like [this behavior was
intentional](https://github.com/apache/spark/blob/b67b35f76b684c5176dc683e7491fd01b43f4467/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala#L143-L145).
Then again, that comment makes it sound like the user is going to directly
modify the `weight` and `minShare` of the constructed pool, but though [they
are
`var`s](https://github.com/apache/spark/blob/b67b35f76b684c5176dc683e7491fd01b43f4467/core/src/main/scala/org/apache/spark/scheduler/Pool.scala#L40-L41),
its `private [spark]` and there isn't anything else exposing it to the user.
I prefer fail-fast behavior, but it seems like fair scheduler configuration
prefers to assume defaults when there is misconfiguration. If you think this
the right behavior, than there is probably some minor cleanup to do in Pool &
SchedulableBuilder to clarify.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]