Merge branch 'master' of https://github.com/apache/incubator-spark into sbt-assembly-deps
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/051cd960 Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/051cd960 Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/051cd960 Branch: refs/heads/master Commit: 051cd960d9974ca7eb1ad299294cd5137ad1fa95 Parents: c441904 678dec6 Author: Shivaram Venkataraman <shiva...@eecs.berkeley.edu> Authored: Tue Oct 15 13:26:40 2013 -0700 Committer: Shivaram Venkataraman <shiva...@eecs.berkeley.edu> Committed: Tue Oct 15 13:26:40 2013 -0700 ---------------------------------------------------------------------- assembly/README | 11 +- assembly/pom.xml | 24 +- bagel/pom.xml | 10 +- bin/stop-slaves.sh | 2 - conf/fairscheduler.xml.template | 18 +- conf/metrics.properties.template | 50 ++- core/pom.xml | 28 +- .../spark/network/netty/FileClientHandler.java | 3 +- .../spark/network/netty/FileServerHandler.java | 6 +- .../scala/org/apache/spark/Aggregator.scala | 49 ++- .../apache/spark/BlockStoreShuffleFetcher.scala | 23 +- .../scala/org/apache/spark/CacheManager.scala | 35 +- .../scala/org/apache/spark/FutureAction.scala | 250 +++++++++++ .../apache/spark/InterruptibleIterator.scala | 30 ++ .../org/apache/spark/MapOutputTracker.scala | 4 +- .../scala/org/apache/spark/ShuffleFetcher.scala | 5 +- .../scala/org/apache/spark/SparkContext.scala | 137 ++++-- .../main/scala/org/apache/spark/SparkEnv.scala | 2 +- .../scala/org/apache/spark/TaskContext.scala | 24 +- .../scala/org/apache/spark/TaskEndReason.scala | 10 +- .../org/apache/spark/api/java/JavaRDDLike.scala | 10 + .../spark/api/python/PythonPartitioner.scala | 10 +- .../org/apache/spark/api/python/PythonRDD.scala | 6 +- .../spark/broadcast/BitTorrentBroadcast.scala | 4 +- .../apache/spark/broadcast/HttpBroadcast.scala | 13 +- .../apache/spark/broadcast/TreeBroadcast.scala | 8 +- .../org/apache/spark/deploy/DeployMessage.scala | 25 +- .../spark/deploy/ExecutorDescription.scala | 34 ++ .../spark/deploy/FaultToleranceTest.scala | 420 +++++++++++++++++++ .../org/apache/spark/deploy/JsonProtocol.scala | 10 +- .../apache/spark/deploy/LocalSparkCluster.scala | 9 +- .../apache/spark/deploy/SparkHadoopUtil.scala | 12 +- .../org/apache/spark/deploy/client/Client.scala | 84 +++- .../spark/deploy/client/ClientListener.scala | 4 + .../apache/spark/deploy/client/TestClient.scala | 7 +- .../spark/deploy/master/ApplicationInfo.scala | 53 ++- .../spark/deploy/master/ApplicationSource.scala | 2 +- .../spark/deploy/master/ApplicationState.scala | 4 +- .../spark/deploy/master/ExecutorInfo.scala | 7 +- .../master/FileSystemPersistenceEngine.scala | 90 ++++ .../deploy/master/LeaderElectionAgent.scala | 45 ++ .../org/apache/spark/deploy/master/Master.scala | 245 +++++++++-- .../spark/deploy/master/MasterMessages.scala | 46 ++ .../spark/deploy/master/MasterSource.scala | 6 +- .../spark/deploy/master/PersistenceEngine.scala | 53 +++ .../spark/deploy/master/RecoveryState.scala | 26 ++ .../deploy/master/SparkZooKeeperSession.scala | 203 +++++++++ .../apache/spark/deploy/master/WorkerInfo.scala | 42 +- .../spark/deploy/master/WorkerState.scala | 6 +- .../master/ZooKeeperLeaderElectionAgent.scala | 136 ++++++ .../master/ZooKeeperPersistenceEngine.scala | 85 ++++ .../spark/deploy/worker/ExecutorRunner.scala | 13 +- .../org/apache/spark/deploy/worker/Worker.scala | 175 +++++--- .../spark/deploy/worker/WorkerArguments.scala | 8 +- .../spark/deploy/worker/WorkerSource.scala | 10 +- .../spark/deploy/worker/ui/WorkerWebUI.scala | 2 +- .../org/apache/spark/executor/Executor.scala | 146 +++++-- .../apache/spark/executor/ExecutorSource.scala | 23 +- .../spark/executor/MesosExecutorBackend.scala | 18 +- .../executor/StandaloneExecutorBackend.scala | 10 +- .../apache/spark/metrics/MetricsConfig.scala | 7 +- .../apache/spark/metrics/sink/GangliaSink.scala | 82 ++++ .../spark/metrics/sink/MetricsServlet.scala | 11 +- .../apache/spark/network/netty/FileHeader.scala | 22 +- .../spark/network/netty/ShuffleCopier.scala | 27 +- .../spark/network/netty/ShuffleSender.scala | 11 +- .../org/apache/spark/rdd/AsyncRDDActions.scala | 122 ++++++ .../scala/org/apache/spark/rdd/BlockRDD.scala | 6 +- .../org/apache/spark/rdd/CheckpointRDD.scala | 2 +- .../org/apache/spark/rdd/CoGroupedRDD.scala | 26 +- .../scala/org/apache/spark/rdd/HadoopRDD.scala | 174 +++++--- .../spark/rdd/MapPartitionsWithContextRDD.scala | 41 ++ .../spark/rdd/MapPartitionsWithIndexRDD.scala | 41 -- .../org/apache/spark/rdd/NewHadoopRDD.scala | 79 ++-- .../org/apache/spark/rdd/PairRDDFunctions.scala | 16 +- .../spark/rdd/ParallelCollectionRDD.scala | 5 +- .../main/scala/org/apache/spark/rdd/RDD.scala | 150 ++++--- .../org/apache/spark/rdd/ShuffledRDD.scala | 2 +- .../org/apache/spark/rdd/SubtractedRDD.scala | 2 +- .../apache/spark/scheduler/DAGScheduler.scala | 146 ++++--- .../spark/scheduler/DAGSchedulerEvent.scala | 25 +- .../spark/scheduler/DAGSchedulerSource.scala | 18 +- .../org/apache/spark/scheduler/JobLogger.scala | 1 - .../org/apache/spark/scheduler/JobWaiter.scala | 62 +-- .../scala/org/apache/spark/scheduler/Pool.scala | 124 ++++++ .../org/apache/spark/scheduler/ResultTask.scala | 52 ++- .../apache/spark/scheduler/Schedulable.scala | 46 ++ .../spark/scheduler/SchedulableBuilder.scala | 153 +++++++ .../spark/scheduler/SchedulingAlgorithm.scala | 81 ++++ .../apache/spark/scheduler/SchedulingMode.scala | 29 ++ .../apache/spark/scheduler/ShuffleMapTask.scala | 50 ++- .../apache/spark/scheduler/SparkListener.scala | 3 +- .../spark/scheduler/SparkListenerBus.scala | 18 + .../org/apache/spark/scheduler/StageInfo.scala | 2 +- .../scala/org/apache/spark/scheduler/Task.scala | 63 ++- .../spark/scheduler/TaskDescription.scala | 37 ++ .../org/apache/spark/scheduler/TaskInfo.scala | 72 ++++ .../apache/spark/scheduler/TaskLocality.scala | 32 ++ .../org/apache/spark/scheduler/TaskResult.scala | 16 +- .../apache/spark/scheduler/TaskScheduler.scala | 8 +- .../spark/scheduler/TaskSchedulerListener.scala | 1 - .../org/apache/spark/scheduler/TaskSet.scala | 4 + .../apache/spark/scheduler/TaskSetManager.scala | 48 +++ .../scheduler/cluster/ClusterScheduler.scala | 94 +++-- .../cluster/ClusterTaskSetManager.scala | 238 +++++------ .../apache/spark/scheduler/cluster/Pool.scala | 121 ------ .../spark/scheduler/cluster/Schedulable.scala | 48 --- .../scheduler/cluster/SchedulableBuilder.scala | 137 ------ .../scheduler/cluster/SchedulerBackend.scala | 6 +- .../scheduler/cluster/SchedulingAlgorithm.scala | 81 ---- .../scheduler/cluster/SchedulingMode.scala | 29 -- .../cluster/SparkDeploySchedulerBackend.scala | 26 +- .../cluster/StandaloneClusterMessage.scala | 3 + .../cluster/StandaloneSchedulerBackend.scala | 8 + .../scheduler/cluster/TaskDescription.scala | 37 -- .../spark/scheduler/cluster/TaskInfo.scala | 72 ---- .../spark/scheduler/cluster/TaskLocality.scala | 32 -- .../scheduler/cluster/TaskResultGetter.scala | 124 ++++++ .../scheduler/cluster/TaskSetManager.scala | 51 --- .../mesos/CoarseMesosSchedulerBackend.scala | 286 +++++++++++++ .../cluster/mesos/MesosSchedulerBackend.scala | 345 +++++++++++++++ .../spark/scheduler/local/LocalScheduler.scala | 196 ++++----- .../scheduler/local/LocalTaskSetManager.scala | 32 +- .../mesos/CoarseMesosSchedulerBackend.scala | 286 ------------- .../scheduler/mesos/MesosSchedulerBackend.scala | 343 --------------- .../spark/serializer/KryoSerializer.scala | 30 +- .../apache/spark/storage/BlockException.scala | 2 +- .../spark/storage/BlockFetcherIterator.scala | 24 +- .../org/apache/spark/storage/BlockId.scala | 96 +++++ .../org/apache/spark/storage/BlockManager.scala | 155 ++++--- .../spark/storage/BlockManagerMaster.scala | 8 +- .../spark/storage/BlockManagerMasterActor.scala | 21 +- .../spark/storage/BlockManagerMessages.scala | 16 +- .../spark/storage/BlockManagerSource.scala | 14 +- .../spark/storage/BlockManagerWorker.scala | 4 +- .../org/apache/spark/storage/BlockMessage.scala | 38 +- .../spark/storage/BlockMessageArray.scala | 7 +- .../spark/storage/BlockObjectWriter.scala | 2 +- .../org/apache/spark/storage/BlockStore.scala | 14 +- .../org/apache/spark/storage/DiskStore.scala | 39 +- .../org/apache/spark/storage/MemoryStore.scala | 48 +-- .../spark/storage/ShuffleBlockManager.scala | 16 +- .../org/apache/spark/storage/StorageLevel.scala | 6 +- .../org/apache/spark/storage/StorageUtils.scala | 47 +-- .../apache/spark/storage/ThreadingTest.scala | 6 +- .../scala/org/apache/spark/ui/SparkUI.scala | 7 +- .../scala/org/apache/spark/ui/UIUtils.scala | 39 +- .../apache/spark/ui/UIWorkloadGenerator.scala | 8 +- .../org/apache/spark/ui/exec/ExecutorsUI.scala | 2 +- .../org/apache/spark/ui/jobs/IndexPage.scala | 2 +- .../spark/ui/jobs/JobProgressListener.scala | 6 +- .../apache/spark/ui/jobs/JobProgressUI.scala | 4 +- .../org/apache/spark/ui/jobs/PoolTable.scala | 6 +- .../org/apache/spark/ui/jobs/StagePage.scala | 6 +- .../org/apache/spark/ui/jobs/StageTable.scala | 10 +- .../org/apache/spark/ui/storage/IndexPage.scala | 2 +- .../org/apache/spark/ui/storage/RDDPage.scala | 23 +- .../org/apache/spark/util/AppendOnlyMap.scala | 230 ++++++++++ .../org/apache/spark/util/MetadataCleaner.scala | 36 +- .../scala/org/apache/spark/util/Utils.scala | 39 +- .../org/apache/spark/CacheManagerSuite.scala | 94 +++++ .../org/apache/spark/CheckpointSuite.scala | 10 +- .../org/apache/spark/DistributedSuite.scala | 29 +- .../scala/org/apache/spark/JavaAPISuite.java | 2 +- .../org/apache/spark/JobCancellationSuite.scala | 177 ++++++++ .../org/apache/spark/LocalSparkContext.scala | 10 +- .../org/apache/spark/SharedSparkContext.scala | 6 +- .../scala/org/apache/spark/ThreadingSuite.scala | 45 +- .../apache/spark/deploy/JsonProtocolSuite.scala | 93 ++++ .../spark/metrics/MetricsConfigSuite.scala | 19 +- .../apache/spark/rdd/AsyncRDDActionsSuite.scala | 176 ++++++++ .../spark/rdd/PairRDDFunctionsSuite.scala | 2 +- .../scala/org/apache/spark/rdd/RDDSuite.scala | 53 ++- .../spark/scheduler/DAGSchedulerSuite.scala | 31 +- .../spark/scheduler/SparkListenerSuite.scala | 30 +- .../cluster/ClusterSchedulerSuite.scala | 23 +- .../cluster/ClusterTaskSetManagerSuite.scala | 58 ++- .../spark/scheduler/cluster/FakeTask.scala | 5 +- .../cluster/TaskResultGetterSuite.scala | 114 +++++ .../scheduler/local/LocalSchedulerSuite.scala | 34 +- .../spark/serializer/KryoSerializerSuite.scala | 21 + .../org/apache/spark/storage/BlockIdSuite.scala | 114 +++++ .../spark/storage/BlockManagerSuite.scala | 102 ++--- .../scala/org/apache/spark/ui/UISuite.scala | 9 +- .../apache/spark/util/AppendOnlyMapSuite.scala | 154 +++++++ .../org/apache/spark/util/UtilsSuite.scala | 11 + docker/README.md | 5 + docker/build | 22 + docker/spark-test/README.md | 10 + docker/spark-test/base/Dockerfile | 38 ++ docker/spark-test/build | 22 + docker/spark-test/master/Dockerfile | 21 + docker/spark-test/master/default_cmd | 22 + docker/spark-test/worker/Dockerfile | 22 + docker/spark-test/worker/default_cmd | 22 + docs/_config.yml | 6 +- docs/_layouts/global.html | 13 +- docs/building-with-maven.md | 6 +- docs/cluster-overview.md | 117 ++++++ docs/configuration.md | 34 +- docs/contributing-to-spark.md | 24 +- docs/ec2-scripts.md | 8 +- docs/hadoop-third-party-distributions.md | 118 ++++++ docs/hardware-provisioning.md | 4 +- docs/img/cluster-overview.png | Bin 0 -> 28011 bytes docs/img/cluster-overview.pptx | Bin 0 -> 51771 bytes docs/img/spark-logo-hd.png | Bin 0 -> 13512 bytes docs/index.md | 26 +- docs/job-scheduling.md | 168 ++++++++ docs/mllib-guide.md | 208 ++++++++- docs/monitoring.md | 70 ++++ docs/python-programming-guide.md | 14 +- docs/quick-start.md | 70 ++-- docs/running-on-mesos.md | 48 +-- docs/running-on-yarn.md | 45 +- docs/scala-programming-guide.md | 4 +- docs/spark-standalone.md | 121 +++++- docs/streaming-programming-guide.md | 5 +- docs/tuning.md | 2 +- ec2/README | 2 +- .../root/spark-ec2/ec2-variables.sh | 17 + ec2/spark_ec2.py | 121 ++++-- examples/pom.xml | 46 +- .../org/apache/spark/examples/SparkKMeans.scala | 2 - make-distribution.sh | 2 +- mllib/data/sample_svm_data.txt | 322 ++++++++++++++ mllib/pom.xml | 10 +- .../apache/spark/mllib/classification/SVM.scala | 13 +- .../apache/spark/mllib/recommendation/ALS.scala | 199 +++++++-- .../mllib/regression/LinearRegression.scala | 4 +- .../mllib/recommendation/JavaALSSuite.java | 85 +++- .../spark/mllib/recommendation/ALSSuite.scala | 75 +++- pom.xml | 282 +++++++------ project/SparkBuild.scala | 36 +- project/project/SparkPluginBuild.scala | 19 +- python/pyspark/__init__.py | 5 +- python/pyspark/context.py | 11 + python/pyspark/rdd.py | 89 +++- python/pyspark/serializers.py | 4 + python/pyspark/shell.py | 3 +- python/pyspark/storagelevel.py | 43 ++ repl-bin/pom.xml | 22 +- repl/pom.xml | 34 +- .../org/apache/spark/repl/SparkILoop.scala | 2 +- run-example | 8 +- spark-class | 12 +- streaming/pom.xml | 11 +- .../spark/streaming/NetworkInputTracker.scala | 11 +- .../streaming/dstream/NetworkInputDStream.scala | 14 +- .../streaming/dstream/RawInputDStream.scala | 4 +- .../streaming/receivers/ActorReceiver.scala | 4 +- tools/pom.xml | 10 +- yarn/pom.xml | 72 ++-- .../spark/deploy/yarn/ApplicationMaster.scala | 131 ++++-- .../yarn/ApplicationMasterArguments.scala | 2 +- .../org/apache/spark/deploy/yarn/Client.scala | 172 ++++++-- .../spark/deploy/yarn/ClientArguments.scala | 31 +- .../spark/deploy/yarn/WorkerRunnable.scala | 66 ++- .../deploy/yarn/YarnAllocationHandler.scala | 13 +- .../cluster/YarnClusterScheduler.scala | 3 + 260 files changed, 9640 insertions(+), 3445 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/051cd960/project/SparkBuild.scala ---------------------------------------------------------------------- diff --cc project/SparkBuild.scala index bc2cec0,973f1e2..d7e8207 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@@ -75,14 -77,13 +79,16 @@@ object SparkBuild extends Build lazy val allProjects = Seq[ProjectReference]( core, repl, examples, bagel, streaming, mllib, tools, assemblyProj) ++ maybeYarnRef + // Everything except assembly and examples belongs to packageProjects + lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib) ++ maybeYarnRef + def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.apache.spark", - version := "0.8.0-SNAPSHOT", + version := "0.9.0-incubating-SNAPSHOT", scalaVersion := "2.9.3", - scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"), + scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation", + "-target:" + SCALAC_JVM_VERSION), + javacOptions := Seq("-target", JAVAC_JVM_VERSION, "-source", JAVAC_JVM_VERSION), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",