[2/3] git commit: Remove the new HadoopRDD constructor from SparkContext API, plus some minor style changes.

rxin Sat, 12 Oct 2013 21:24:35 -0700

Remove the new HadoopRDD constructor from SparkContext API, plus some minor 
style changes.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/6c32aab8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/6c32aab8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/6c32aab8

Branch: refs/heads/master
Commit: 6c32aab87d4ea26086536fb988744d0d30792160
Parents: 5a99e67
Author: Harvey Feng <har...@databricks.com>
Authored: Sat Oct 12 20:57:38 2013 -0700
Committer: Harvey Feng <har...@databricks.com>
Committed: Sat Oct 12 21:02:08 2013 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/SparkContext.scala   | 28 ++------------------
 .../scala/org/apache/spark/rdd/HadoopRDD.scala  |  2 +-
 2 files changed, 3 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6c32aab8/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d9c6264..7488cdd 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -346,30 +346,6 @@ class SparkContext(
     new HadoopRDD(this, conf, inputFormatClass, keyClass, valueClass, 
minSplits)
   }
 
-  /**
-   * Get an RDD for a Hadoop file with an arbitray InputFormat. Accept a 
Hadoop Configuration
-   * that has already been broadcast and use it to construct JobConfs local to 
each process. These
-   * JobConfs will be initialized using an optional, user-specified closure.
-   */
-  def hadoopRDD[K, V](
-      path: String,
-      confBroadcast: Broadcast[SerializableWritable[Configuration]],
-      initLocalJobConfOpt: Option[JobConf => Unit],
-      inputFormatClass: Class[_ <: InputFormat[K, V]],
-      keyClass: Class[K],
-      valueClass: Class[V],
-      minSplits: Int
-    ): RDD[(K, V)] = {
-    new HadoopRDD(
-      this,
-      confBroadcast,
-      initLocalJobConfOpt,
-      inputFormatClass,
-      keyClass,
-      valueClass,
-      minSplits)
-  }
-
   /** Get an RDD for a Hadoop file with an arbitrary InputFormat */
   def hadoopFile[K, V](
       path: String,
@@ -380,11 +356,11 @@ class SparkContext(
       ): RDD[(K, V)] = {
     // A Hadoop configuration can be about 10 KB, which is pretty big, so 
broadcast it.
     val confBroadcast = broadcast(new 
SerializableWritable(hadoopConfiguration))
-    val setInputPathsFunc = Some((jobConf: JobConf) => 
FileInputFormat.setInputPaths(jobConf, path))
+    val setInputPathsFunc = (jobConf: JobConf) => 
FileInputFormat.setInputPaths(jobConf, path)
     new HadoopRDD(
       this,
       confBroadcast,
-      setInputPathsFunc,
+      Some(setInputPathsFunc),
       inputFormatClass,
       keyClass,
       valueClass,

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6c32aab8/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala 
b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 4ecdd65..2d394ab 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -55,7 +55,7 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, 
@transient s: InputSp
  * @param broadCastedConf A general Hadoop Configuration, or a subclass of it. 
If the enclosed
  *     variabe references an instance of JobConf, then that JobConf will be 
used for the Hadoop job.
  *     Otherwise, a new JobConf will be created on each slave using the 
enclosed Configuration.
- * @param initLocalJobConfFuncOpt Optional closure used to initialize any 
JobCOnf that HadoopRDD
+ * @param initLocalJobConfFuncOpt Optional closure used to initialize any 
JobConf that HadoopRDD
  *     creates.
  * @param inputFormatClass Storage format of the data to be read.
  * @param keyClass Class of the key associated with the inputFormatClass.

[2/3] git commit: Remove the new HadoopRDD constructor from SparkContext API, plus some minor style changes.

Reply via email to