[GitHub] spark pull request: [SPARK-1776] Have Spark's SBT build read depen...

marmbrus Thu, 03 Jul 2014 13:59:29 -0700

Github user marmbrus commented on a diff in the pull request:

    https://github.com/apache/spark/pull/772#discussion_r14537586
  
    --- Diff: project/SparkBuild.scala ---
    @@ -15,393 +15,194 @@
      * limitations under the License.
      */
     
    +import scala.util.Properties
    +import scala.collection.JavaConversions._
    +
     import sbt._
    -import sbt.Classpaths.publishTask
     import sbt.Keys._
    -import sbtassembly.Plugin._
    -import AssemblyKeys._
    -import scala.util.Properties
     import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
    -import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
    -import sbtunidoc.Plugin._
    -import UnidocKeys._
    +import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys}
    +import net.virtualvoid.sbt.graph.Plugin.graphSettings
     
    -import scala.collection.JavaConversions._
    +object BuildCommons {
     
    -// For Sonatype publishing
    -// import com.jsuereth.pgp.sbtplugin.PgpKeys._
    +  private val buildLocation = file(".").getAbsoluteFile.getParentFile
     
    -object SparkBuild extends Build {
    -  val SPARK_VERSION = "1.1.0-SNAPSHOT"
    -  val SPARK_VERSION_SHORT = SPARK_VERSION.replaceAll("-SNAPSHOT", "")
    +  val allProjects@Seq(bagel, catalyst, core, graphx, hive, mllib, repl, 
spark, sql, streaming,
    +  streamingFlume, streamingKafka, streamingMqtt, streamingTwitter, 
streamingZeromq) =
    +    Seq("bagel", "catalyst", "core", "graphx", "hive", "mllib", "repl", 
"spark", "sql",
    +      "streaming", "streaming-flume", "streaming-kafka", "streaming-mqtt", 
"streaming-twitter",
    +      "streaming-zeromq").map(ProjectRef(buildLocation, _))
     
    -  // Hadoop version to build against. For example, "1.0.4" for Apache 
releases, or
    -  // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables 
can be set
    -  // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
    -  val DEFAULT_HADOOP_VERSION = "1.0.4"
    +  val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, 
java8Tests, sparkGangliaLgpl) =
    +    Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", 
"ganglia-lgpl").map(ProjectRef(buildLocation, _))
     
    -  // Whether the Hadoop version to build against is 2.2.x, or a variant of 
it. This can be set
    -  // through the SPARK_IS_NEW_HADOOP environment variable.
    -  val DEFAULT_IS_NEW_HADOOP = false
    +  val assemblyProjects@Seq(assembly, examples, tools) = Seq("assembly", 
"examples", "tools")
    +    .map(ProjectRef(buildLocation, _))
     
    -  val DEFAULT_YARN = false
    +  val sparkHome = buildLocation
    +}
     
    -  val DEFAULT_HIVE = false
    +object SparkBuild extends PomBuild {
     
    -  // HBase version; set as appropriate.
    -  val HBASE_VERSION = "0.94.6"
    +  import BuildCommons._
    +  import scala.collection.mutable.Map
     
    -  // Target JVM version
    -  val SCALAC_JVM_VERSION = "jvm-1.6"
    -  val JAVAC_JVM_VERSION = "1.6"
    +  val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty
     
    -  lazy val root = Project("root", file("."), settings = rootSettings) 
aggregate(allProjects: _*)
    +  def backwardCompatibility = {
    +    import scala.collection.mutable
    +    var profiles: mutable.Seq[String] = mutable.Seq.empty
    +    if (Properties.envOrNone("SPARK_YARN").isDefined) profiles ++= 
Seq("yarn")
    +    if (Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined) profiles ++= 
Seq("spark-ganglia-lgpl")
    +    if (Properties.envOrNone("SPARK_HIVE").isDefined) profiles ++= 
Seq("hive")
    +    Properties.envOrNone("SPARK_HADOOP_VERSION") match {
    +      case Some(v) => System.setProperty("hadoop.version", v)
    +      case None =>
    +    }
    +    profiles
    +  }
     
    -  lazy val core = Project("core", file("core"), settings = coreSettings)
    +  override val profiles = Properties.envOrNone("MAVEN_PROFILES") match {
    +    case None => backwardCompatibility
    +    // Rationale: If -P option exists no need to support 
backwardCompatibility.
    +    case Some(v) => 
v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
    +  }
    +
    +  override val userPropertiesMap = System.getProperties.toMap
    +
    +  lazy val sharedSettings = graphSettings ++ ScalaStyleSettings ++ Seq (
    +    javaHome   := Properties.envOrNone("JAVA_HOME").map(file),
    +    incOptions := incOptions.value.withNameHashing(true),
    +    retrieveManaged := true,
    +    retrievePattern := 
"[type]s/[artifact](-[revision])(-[classifier]).[ext]",
    +    publishMavenStyle := true
    +  )
     
       /** Following project only exists to pull previous artifacts of Spark 
for generating
         Mima ignores. For more information see: SPARK 2071 */
       lazy val oldDeps = Project("oldDeps", file("dev"), settings = 
oldDepsSettings)
     
    -  def replDependencies = Seq[ProjectReference](core, graphx, bagel, mllib, 
sql) ++ maybeHiveRef
    -
    -  lazy val repl = Project("repl", file("repl"), settings = replSettings)
    -    .dependsOn(replDependencies.map(a => a: 
sbt.ClasspathDep[sbt.ProjectReference]): _*)
    -
    -  lazy val tools = Project("tools", file("tools"), settings = 
toolsSettings) dependsOn(core) dependsOn(streaming)
    -
    -  lazy val bagel = Project("bagel", file("bagel"), settings = 
bagelSettings) dependsOn(core)
    -
    -  lazy val graphx = Project("graphx", file("graphx"), settings = 
graphxSettings) dependsOn(core)
    -
    -  lazy val catalyst = Project("catalyst", file("sql/catalyst"), settings = 
catalystSettings) dependsOn(core)
    -
    -  lazy val sql = Project("sql", file("sql/core"), settings = 
sqlCoreSettings) dependsOn(core) dependsOn(catalyst % 
"compile->compile;test->test")
    -
    -  lazy val hive = Project("hive", file("sql/hive"), settings = 
hiveSettings) dependsOn(sql)
    -
    -  lazy val maybeHive: Seq[ClasspathDependency] = if (isHiveEnabled) 
Seq(hive) else Seq()
    -  lazy val maybeHiveRef: Seq[ProjectReference] = if (isHiveEnabled) 
Seq(hive) else Seq()
    -
    -  lazy val streaming = Project("streaming", file("streaming"), settings = 
streamingSettings) dependsOn(core)
    -
    -  lazy val mllib = Project("mllib", file("mllib"), settings = 
mllibSettings) dependsOn(core)
    -
    -  lazy val assemblyProj = Project("assembly", file("assembly"), settings = 
assemblyProjSettings)
    -    .dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) 
dependsOn(maybeYarn: _*) dependsOn(maybeHive: _*) dependsOn(maybeGanglia: _*)
    -
    -  lazy val assembleDepsTask = TaskKey[Unit]("assemble-deps")
    -  lazy val assembleDeps = assembleDepsTask := {
    -    println()
    -    println("**** NOTE ****")
    -    println("'sbt/sbt assemble-deps' is no longer supported.")
    -    println("Instead create a normal assembly and:")
    -    println("  export SPARK_PREPEND_CLASSES=1 (toggle on)")
    -    println("  unset SPARK_PREPEND_CLASSES (toggle off)")
    -    println()
    -  }
    -
    -  // A configuration to set an alternative publishLocalConfiguration
    -  lazy val MavenCompile = config("m2r") extend(Compile)
    -  lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish 
local for m2 and ivy")
    -  val sparkHome = System.getProperty("user.dir")
    -
    -  // Allows build configuration to be set through environment variables
    -  lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", 
DEFAULT_HADOOP_VERSION)
    -  lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match 
{
    -    case None => {
    -      val isNewHadoopVersion = 
"^2\\.[2-9]+".r.findFirstIn(hadoopVersion).isDefined
    -      (isNewHadoopVersion|| DEFAULT_IS_NEW_HADOOP)
    -    }
    -    case Some(v) => v.toBoolean
    +  def versionArtifact(id: String): Option[sbt.ModuleID] = {
    +    val fullId = id + "_2.10"
    +    Some("org.apache.spark" % fullId % "1.0.0")
       }
     
    -  lazy val isYarnEnabled = Properties.envOrNone("SPARK_YARN") match {
    -    case None => DEFAULT_YARN
    -    case Some(v) => v.toBoolean
    -  }
    -  lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || 
hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client"
    -  val maybeAvro = if (hadoopVersion.startsWith("0.23.")) 
Seq("org.apache.avro" % "avro" % "1.7.4") else Seq()
    +  def oldDepsSettings() = Defaults.defaultSettings ++ Seq(
    +    name := "old-deps",
    +    scalaVersion := "2.10.4",
    +    retrieveManaged := true,
    +    retrievePattern := 
"[type]s/[artifact](-[revision])(-[classifier]).[ext]",
    +    libraryDependencies := Seq("spark-streaming-mqtt", 
"spark-streaming-zeromq",
    +      "spark-streaming-flume", "spark-streaming-kafka", 
"spark-streaming-twitter",
    +      "spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx",
    +      "spark-core").map(versionArtifact(_).get intransitive())
    +  )
     
    -  lazy val isHiveEnabled = Properties.envOrNone("SPARK_HIVE") match {
    -    case None => DEFAULT_HIVE
    -    case Some(v) => v.toBoolean
    +  def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = {
    +    val existingSettings = projectsMap.getOrElse(projectRef.project, 
Seq[Setting[_]]())
    +    projectsMap += (projectRef.project -> (existingSettings ++ settings))
       }
     
    -  // Include Ganglia integration if the user has enabled Ganglia
    -  // This is isolated from the normal build due to LGPL-licensed code in 
the library
    -  lazy val isGangliaEnabled = 
Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined
    -  lazy val gangliaProj = Project("spark-ganglia-lgpl", 
file("extras/spark-ganglia-lgpl"), settings = gangliaSettings).dependsOn(core)
    -  val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) 
Seq(gangliaProj) else Seq()
    -  val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) 
Seq(gangliaProj) else Seq()
    -
    -  // Include the Java 8 project if the JVM version is 8+
    -  lazy val javaVersion = System.getProperty("java.specification.version")
    -  lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble
    -  val maybeJava8Tests = if (isJava8Enabled) 
Seq[ProjectReference](java8Tests) else Seq[ProjectReference]()
    -  lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), 
settings = java8TestsSettings).
    -    dependsOn(core) dependsOn(streaming % "compile->compile;test->test")
    -
    -  // Include the YARN project if the user has enabled YARN
    -  lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings 
= yarnAlphaSettings) dependsOn(core)
    -  lazy val yarn = Project("yarn", file("yarn/stable"), settings = 
yarnSettings) dependsOn(core)
    +  // Note ordering of these settings matter.
    +  /* Enable shared settings on all projects */
    +  allProjects ++ optionallyEnabledProjects ++ assemblyProjects foreach 
enable(sharedSettings)
     
    -  lazy val maybeYarn: Seq[ClasspathDependency] = if (isYarnEnabled) Seq(if 
(isNewHadoop) yarn else yarnAlpha) else Seq()
    -  lazy val maybeYarnRef: Seq[ProjectReference] = if (isYarnEnabled) Seq(if 
(isNewHadoop) yarn else yarnAlpha) else Seq()
    +  /* Enable tests settings for all projects except examples, assembly and 
tools */
    +  allProjects ++ optionallyEnabledProjects foreach 
enable(TestSettings.settings)
     
    -  lazy val externalTwitter = Project("external-twitter", 
file("external/twitter"), settings = twitterSettings)
    -    .dependsOn(streaming % "compile->compile;test->test")
    +  /* Enable Mima for all projects except spark, sql, hive, catalyst  and 
repl */
    +  allProjects.filterNot(y => Seq(spark, sql, hive, catalyst, 
repl).exists(x => x == y)).
    --- End diff --
    
    How about not catalyst as that is not supposed to be public at all and I 
plan to do significant refactoring.  I think for SQL it is a good idea.  We can 
always turn it off if it becomes onerous.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

[GitHub] spark pull request: [SPARK-1776] Have Spark's SBT build read depen...

Reply via email to