spark git commit: [SPARK-11807] Remove support for Hadoop < 2.2

rxin Mon, 21 Dec 2015 22:16:36 -0800

Repository: spark
Updated Branches:
  refs/heads/master 29cecd4a4 -> 0a38637d0



[SPARK-11807] Remove support for Hadoop < 2.2

i.e. Hadoop 1 and Hadoop 2.0

Author: Reynold Xin <r...@databricks.com>

Closes #10404 from rxin/SPARK-11807.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a38637d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a38637d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a38637d

Branch: refs/heads/master
Commit: 0a38637d05d2338503ecceacfb911a6da6d49538
Parents: 29cecd4
Author: Reynold Xin <r...@databricks.com>
Authored: Mon Dec 21 22:15:52 2015 -0800
Committer: Reynold Xin <r...@databricks.com>
Committed: Mon Dec 21 22:15:52 2015 -0800

----------------------------------------------------------------------
 .../spark/deploy/history/FsHistoryProvider.scala  | 10 +---------
 .../mapreduce/SparkHadoopMapReduceUtil.scala      | 17 ++---------------
 dev/create-release/release-build.sh               |  3 ---
 dev/run-tests-jenkins.py                          |  4 ----
 dev/run-tests.py                                  |  2 --
 docs/building-spark.md                            | 18 ++++--------------
 make-distribution.sh                              |  2 +-
 pom.xml                                           | 13 -------------
 sql/README.md                                     |  2 +-
 9 files changed, 9 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0a38637d/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 718efc4..6e91d73 100644
--- 
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ 
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -663,16 +663,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, 
clock: Clock)
 
   // For testing.
   private[history] def isFsInSafeMode(dfs: DistributedFileSystem): Boolean = {
-    val hadoop1Class = 
"org.apache.hadoop.hdfs.protocol.FSConstants$SafeModeAction"
     val hadoop2Class = 
"org.apache.hadoop.hdfs.protocol.HdfsConstants$SafeModeAction"
-    val actionClass: Class[_] =
-      try {
-        getClass().getClassLoader().loadClass(hadoop2Class)
-      } catch {
-        case _: ClassNotFoundException =>
-          getClass().getClassLoader().loadClass(hadoop1Class)
-      }
-
+    val actionClass: Class[_] = 
getClass().getClassLoader().loadClass(hadoop2Class)
     val action = actionClass.getField("SAFEMODE_GET").get(null)
     val method = dfs.getClass().getMethod("setSafeMode", action.getClass())
     method.invoke(dfs, action).asInstanceOf[Boolean]

http://git-wip-us.apache.org/repos/asf/spark/blob/0a38637d/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala 
b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
index 943ebcb..82d807f 100644
--- 
a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
+++ 
b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
@@ -26,17 +26,13 @@ import org.apache.spark.util.Utils
 private[spark]
 trait SparkHadoopMapReduceUtil {
   def newJobContext(conf: Configuration, jobId: JobID): JobContext = {
-    val klass = firstAvailableClass(
-        "org.apache.hadoop.mapreduce.task.JobContextImpl",  // hadoop2, 
hadoop2-yarn
-        "org.apache.hadoop.mapreduce.JobContext")           // hadoop1
+    val klass = 
Utils.classForName("org.apache.hadoop.mapreduce.task.JobContextImpl")
     val ctor = klass.getDeclaredConstructor(classOf[Configuration], 
classOf[JobID])
     ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
   }
 
   def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): 
TaskAttemptContext = {
-    val klass = firstAvailableClass(
-        "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl",  // 
hadoop2, hadoop2-yarn
-        "org.apache.hadoop.mapreduce.TaskAttemptContext")           // hadoop1
+    val klass = 
Utils.classForName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl")
     val ctor = klass.getDeclaredConstructor(classOf[Configuration], 
classOf[TaskAttemptID])
     ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
   }
@@ -69,13 +65,4 @@ trait SparkHadoopMapReduceUtil {
       }
     }
   }
-
-  private def firstAvailableClass(first: String, second: String): Class[_] = {
-    try {
-      Utils.classForName(first)
-    } catch {
-      case e: ClassNotFoundException =>
-        Utils.classForName(second)
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/0a38637d/dev/create-release/release-build.sh
----------------------------------------------------------------------
diff --git a/dev/create-release/release-build.sh 
b/dev/create-release/release-build.sh
index cb79e9e..b1895b1 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -166,9 +166,6 @@ if [[ "$1" == "package" ]]; then
 
   # We increment the Zinc port each time to avoid OOM's and other craziness if 
multiple builds
   # share the same Zinc server.
-  make_binary_release "hadoop1" "-Psparkr -Phadoop-1 -Phive 
-Phive-thriftserver" "3030" &
-  make_binary_release "hadoop1-scala2.11" "-Psparkr -Phadoop-1 -Phive 
-Dscala-2.11" "3031" &
-  make_binary_release "cdh4" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver 
-Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
   make_binary_release "hadoop2.3" "-Psparkr -Phadoop-2.3 -Phive 
-Phive-thriftserver -Pyarn" "3033" &
   make_binary_release "hadoop2.4" "-Psparkr -Phadoop-2.4 -Phive 
-Phive-thriftserver -Pyarn" "3034" &
   make_binary_release "hadoop2.6" "-Psparkr -Phadoop-2.6 -Phive 
-Phive-thriftserver -Pyarn" "3034" &

http://git-wip-us.apache.org/repos/asf/spark/blob/0a38637d/dev/run-tests-jenkins.py
----------------------------------------------------------------------
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 7aecea2..42afca0 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -163,10 +163,6 @@ def main():
     if "test-maven" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven"
     # Switch the Hadoop profile based on the PR title:
-    if "test-hadoop1.0" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop1.0"
-    if "test-hadoop2.0" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.0"
     if "test-hadoop2.2" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.2"
     if "test-hadoop2.3" in ghprb_pull_title:

http://git-wip-us.apache.org/repos/asf/spark/blob/0a38637d/dev/run-tests.py
----------------------------------------------------------------------
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 2d4e04c..17ceba0 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -301,8 +301,6 @@ def get_hadoop_profiles(hadoop_version):
     """
 
     sbt_maven_hadoop_profiles = {
-        "hadoop1.0": ["-Phadoop-1", "-Dhadoop.version=1.2.1"],
-        "hadoop2.0": ["-Phadoop-1", "-Dhadoop.version=2.0.0-mr1-cdh4.1.1"],
         "hadoop2.2": ["-Pyarn", "-Phadoop-2.2"],
         "hadoop2.3": ["-Pyarn", "-Phadoop-2.3", "-Dhadoop.version=2.3.0"],
         "hadoop2.6": ["-Pyarn", "-Phadoop-2.6"],

http://git-wip-us.apache.org/repos/asf/spark/blob/0a38637d/docs/building-spark.md
----------------------------------------------------------------------
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 3d38edb..7859889 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -33,13 +33,13 @@ to the `sharedSettings` val. See also [this 
PR](https://github.com/apache/spark/
 
 # Building a Runnable Distribution
 
-To create a Spark distribution like those distributed by the 
-[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is 
laid out so as 
-to be runnable, use `make-distribution.sh` in the project root directory. It 
can be configured 
+To create a Spark distribution like those distributed by the
+[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is 
laid out so as
+to be runnable, use `make-distribution.sh` in the project root directory. It 
can be configured
 with Maven profile settings and so on like the direct Maven build. Example:
 
     ./make-distribution.sh --name custom-spark --tgz -Psparkr -Phadoop-2.4 
-Phive -Phive-thriftserver -Pyarn
-    
+
 For more information on usage, run `./make-distribution.sh --help`
 
 # Setting up Maven's Memory Usage
@@ -74,7 +74,6 @@ Because HDFS is not protocol-compatible across versions, if 
you want to read fro
     <tr><th>Hadoop version</th><th>Profile required</th></tr>
   </thead>
   <tbody>
-    <tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr>
     <tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
     <tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
     <tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
@@ -82,15 +81,6 @@ Because HDFS is not protocol-compatible across versions, if 
you want to read fro
   </tbody>
 </table>
 
-For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other 
Hadoop versions without YARN, use:
-
-{% highlight bash %}
-# Apache Hadoop 1.2.1
-mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package
-
-# Cloudera CDH 4.2.0 with MapReduce v1
-mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package
-{% endhighlight %}
 
 You can enable the `yarn` profile and optionally set the `yarn.version` 
property if it is different from `hadoop.version`. Spark only supports YARN 
versions 2.2.0 and later.
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0a38637d/make-distribution.sh
----------------------------------------------------------------------
diff --git a/make-distribution.sh b/make-distribution.sh
index e64ceb8..351b9e7 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -58,7 +58,7 @@ while (( "$#" )); do
     --hadoop)
       echo "Error: '--hadoop' is no longer supported:"
       echo "Error: use Maven profiles and options -Dhadoop.version and 
-Dyarn.version instead."
-      echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 
and hadoop-2.4."
+      echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and 
hadoop-2.4."
       exit_with_usage
       ;;
     --with-yarn)

http://git-wip-us.apache.org/repos/asf/spark/blob/0a38637d/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 32918d6..284c219 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2443,19 +2443,6 @@
     -->
 
     <profile>
-      <id>hadoop-1</id>
-      <properties>
-        <hadoop.version>1.2.1</hadoop.version>
-        <protobuf.version>2.4.1</protobuf.version>
-        <hbase.version>0.98.7-hadoop1</hbase.version>
-        <avro.mapred.classifier>hadoop1</avro.mapred.classifier>
-        <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
-        <akka.group>org.spark-project.akka</akka.group>
-        <akka.version>2.3.4-spark</akka.version>
-      </properties>
-    </profile>
-
-    <profile>
       <id>hadoop-2.2</id>
     <!-- SPARK-7249: Default hadoop profile. Uses global properties. -->
     </profile>

http://git-wip-us.apache.org/repos/asf/spark/blob/0a38637d/sql/README.md
----------------------------------------------------------------------
diff --git a/sql/README.md b/sql/README.md
index 63d4dac..a13bdab 100644
--- a/sql/README.md
+++ b/sql/README.md
@@ -20,7 +20,7 @@ If you are working with Hive 0.12.0, you will need to set 
several environmental
 ```
 export HIVE_HOME="<path to>/hive/build/dist"
 export HIVE_DEV_HOME="<path to>/hive/"
-export HADOOP_HOME="<path to>/hadoop-1.0.4"
+export HADOOP_HOME="<path to>/hadoop"
 ```
 
 If you are working with Hive 0.13.1, the following steps are needed:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11807] Remove support for Hadoop < 2.2

Reply via email to