Repository: spark Updated Branches: refs/heads/master 3eb009f36 -> 3d3acef04
SPARK-1187, Added missing Python APIs The following Python APIs are added, RDD.id() SparkContext.setJobGroup() SparkContext.setLocalProperty() SparkContext.getLocalProperty() SparkContext.sparkUser() was raised earlier as a part of apache/incubator-spark#486 Author: Prabin Banka <prabin.ba...@imaginea.com> Closes #75 from prabinb/python-api-backup and squashes the following commits: cc3c6cd [Prabin Banka] Added missing Python APIs Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d3acef0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d3acef0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d3acef0 Branch: refs/heads/master Commit: 3d3acef0474b6dc21f1b470ea96079a491e58b75 Parents: 3eb009f Author: Prabin Banka <prabin.ba...@imaginea.com> Authored: Thu Mar 6 12:45:27 2014 -0800 Committer: Patrick Wendell <pwend...@gmail.com> Committed: Thu Mar 6 12:45:27 2014 -0800 ---------------------------------------------------------------------- python/pyspark/context.py | 31 +++++++++++++++++++++++++++++++ python/pyspark/rdd.py | 7 +++++++ 2 files changed, 38 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3d3acef0/python/pyspark/context.py ---------------------------------------------------------------------- diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 93faa2e..c9f42d3 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -372,6 +372,37 @@ class SparkContext(object): return newStorageLevel(storageLevel.useDisk, storageLevel.useMemory, storageLevel.deserialized, storageLevel.replication) + def setJobGroup(self, groupId, description): + """ + Assigns a group ID to all the jobs started by this thread until the group ID is set to a + different value or cleared. + + Often, a unit of execution in an application consists of multiple Spark actions or jobs. + Application programmers can use this method to group all those jobs together and give a + group description. Once set, the Spark web UI will associate such jobs with this group. + """ + self._jsc.setJobGroup(groupId, description) + + def setLocalProperty(self, key, value): + """ + Set a local property that affects jobs submitted from this thread, such as the + Spark fair scheduler pool. + """ + self._jsc.setLocalProperty(key, value) + + def getLocalProperty(self, key): + """ + Get a local property set in this thread, or null if it is missing. See + L{setLocalProperty} + """ + return self._jsc.getLocalProperty(key) + + def sparkUser(self): + """ + Get SPARK_USER for user who is running SparkContext. + """ + return self._jsc.sc().sparkUser() + def _test(): import atexit import doctest http://git-wip-us.apache.org/repos/asf/spark/blob/3d3acef0/python/pyspark/rdd.py ---------------------------------------------------------------------- diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index be23f87..097a0a2 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -95,6 +95,13 @@ class RDD(object): self.is_checkpointed = False self.ctx = ctx self._jrdd_deserializer = jrdd_deserializer + self._id = jrdd.id() + + def id(self): + """ + A unique ID for this RDD (within its SparkContext). + """ + return self._id def __repr__(self): return self._jrdd.toString()