Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c86d29b2e -> 5c9555e11


[SPARK-16148][SCHEDULER] Allow for underscores in TaskLocation in the Executor 
ID

## What changes were proposed in this pull request?

Previously, the TaskLocation implementation would not allow for executor ids 
which include underscores.  This tweaks the string split used to get the 
hostname and executor id, allowing for underscores in the executor id.

This addresses the JIRA found here: 
https://issues.apache.org/jira/browse/SPARK-16148

This is moved over from a previous PR against branch-1.6: 
https://github.com/apache/spark/pull/13857

## How was this patch tested?

Ran existing unit tests for core and streaming.  Manually ran a simple 
streaming job with an executor whose id contained underscores and confirmed 
that the job ran successfully.

This is my original work and I license the work to the project under the 
project's open source license.

Author: Tom Magrino <tmagr...@fb.com>

Closes #13858 from tmagrino/fixtasklocation.

(cherry picked from commit ae14f362355b131fcb3e3633da7bb14bdd2b6893)
Signed-off-by: Shixiong Zhu <shixi...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c9555e1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c9555e1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c9555e1

Branch: refs/heads/branch-2.0
Commit: 5c9555e1115ce52954db2a1b18f78cd77ec8c15f
Parents: c86d29b
Author: Tom Magrino <tmagr...@fb.com>
Authored: Tue Jun 28 13:36:41 2016 -0700
Committer: Shixiong Zhu <shixi...@databricks.com>
Committed: Tue Jun 28 13:38:27 2016 -0700

----------------------------------------------------------------------
 .../org/apache/spark/scheduler/TaskLocation.scala     | 14 +++++++-------
 .../apache/spark/scheduler/TaskSetManagerSuite.scala  |  2 ++
 2 files changed, 9 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5c9555e1/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala 
b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
index 1eb6c16..06b5293 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
@@ -64,18 +64,18 @@ private[spark] object TaskLocation {
 
   /**
    * Create a TaskLocation from a string returned by getPreferredLocations.
-   * These strings have the form [hostname] or hdfs_cache_[hostname], 
depending on whether the
-   * location is cached.
+   * These strings have the form executor_[hostname]_[executorid], [hostname], 
or
+   * hdfs_cache_[hostname], depending on whether the location is cached.
    */
   def apply(str: String): TaskLocation = {
     val hstr = str.stripPrefix(inMemoryLocationTag)
     if (hstr.equals(str)) {
       if (str.startsWith(executorLocationTag)) {
-        val splits = str.split("_")
-        if (splits.length != 3) {
-          throw new IllegalArgumentException("Illegal executor location 
format: " + str)
-        }
-        new ExecutorCacheTaskLocation(splits(1), splits(2))
+        val hostAndExecutorId = str.stripPrefix(executorLocationTag)
+        val splits = hostAndExecutorId.split("_", 2)
+        require(splits.length == 2, "Illegal executor location format: " + str)
+        val Array(host, executorId) = splits
+        new ExecutorCacheTaskLocation(host, executorId)
       } else {
         new HostTaskLocation(str)
       }

http://git-wip-us.apache.org/repos/asf/spark/blob/5c9555e1/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
----------------------------------------------------------------------
diff --git 
a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala 
b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 9b7b945..8623133 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -787,6 +787,8 @@ class TaskSetManagerSuite extends SparkFunSuite with 
LocalSparkContext with Logg
     assert(TaskLocation("host1") === HostTaskLocation("host1"))
     assert(TaskLocation("hdfs_cache_host1") === HDFSCacheTaskLocation("host1"))
     assert(TaskLocation("executor_host1_3") === 
ExecutorCacheTaskLocation("host1", "3"))
+    assert(TaskLocation("executor_some.host1_executor_task_3") ===
+      ExecutorCacheTaskLocation("some.host1", "executor_task_3"))
   }
 
   private def createTaskResult(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to