roadan closed pull request #21: AMATERASU-28 Miniconda version pulling away from code URL: https://github.com/apache/incubator-amaterasu/pull/21
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/PySpark/PySparkRunner.scala b/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/PySpark/PySparkRunner.scala index 94b8056..5897e1d 100755 --- a/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/PySpark/PySparkRunner.scala +++ b/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/PySpark/PySparkRunner.scala @@ -175,7 +175,7 @@ object PySparkRunner { * Installs Anaconda and then links it with the local spark that was installed on the executor. */ private def installAnacondaOnNode(): Unit = { - Seq("bash", "-c", "sh Miniconda2-latest-Linux-x86_64.sh -b -p $PWD/miniconda") + Seq("bash", "-c", "sh miniconda-install.sh -b -p $PWD/miniconda") Seq("bash", "-c", "$PWD/miniconda/bin/python -m conda install -y conda-build") Seq("bash", "-c", "ln -s $PWD/spark-2.2.1-bin-hadoop2.7/python/pyspark $PWD/miniconda/pkgs/pyspark") } diff --git a/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/SparkRunnersProvider.scala b/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/SparkRunnersProvider.scala index ff56d8c..3d33e8e 100644 --- a/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/SparkRunnersProvider.scala +++ b/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/SparkRunnersProvider.scala @@ -67,11 +67,6 @@ class SparkRunnersProvider extends RunnersProvider with Logging { jars ++= getDependencies(execData.deps) } - if (execData.pyDeps != null && - execData.pyDeps.packages.nonEmpty) { - loadPythonDependencies(execData.pyDeps, notifier) - } - conf = execData.configurations.get("spark") executorEnv = execData.configurations.get("spark_exec_env") val sparkAppName = s"job_${jobId}_executor_$executorId" @@ -85,56 +80,21 @@ class SparkRunnersProvider extends RunnersProvider with Logging { runners.put(sparkScalaRunner.getIdentifier, sparkScalaRunner) // TODO: get rid of hard-coded version - lazy val pySparkRunner = PySparkRunner(execData.env, jobId, notifier, spark, s"${config.spark.home}/python:${config.spark.home}/python/pyspark:${config.spark.home}/python/pyspark/build:${config.spark.home}/python/pyspark/lib/py4j-0.10.4-src.zip", execData.pyDeps, config) + lazy val pySparkRunner = + PySparkRunner( + execData.env, + jobId, + notifier, + spark, + s"${config.spark.home}/python:${config.spark.home}/python/pyspark", + execData.pyDeps, + config) runners.put(pySparkRunner.getIdentifier, pySparkRunner) lazy val sparkSqlRunner = SparkSqlRunner(execData.env, jobId, notifier, spark) runners.put(sparkSqlRunner.getIdentifier, sparkSqlRunner) } - private def installAnacondaPackage(pythonPackage: PythonPackage): Unit = { - val channel = pythonPackage.channel.getOrElse("anaconda") - if (channel == "anaconda") { - Seq("bash", "-c", s"$$PWD/miniconda/bin/python -m conda install -y ${pythonPackage.packageId}") ! shellLoger - } else { - Seq("bash", "-c", s"$$PWD/miniconda/bin/python -m conda install -y -c $channel ${pythonPackage.packageId}") ! shellLoger - } - } - - private def installAnacondaOnNode(): Unit = { - // TODO: get rid of hard-coded version - Seq("bash", "-c", "sh Miniconda2-latest-Linux-x86_64.sh -b -p $PWD/miniconda") ! shellLoger - Seq("bash", "-c", "$PWD/miniconda/bin/python -m conda install -y conda-build") ! shellLoger - Seq("bash", "-c", "ln -s $PWD/spark-2.2.1-bin-hadoop2.7/python/pyspark $PWD/miniconda/pkgs/pyspark") ! shellLoger - } - - private def loadPythonDependencies(deps: PythonDependencies, notifier: Notifier): Unit = { - notifier.info("loading anaconda evn") - installAnacondaOnNode() - val codegenPackage = PythonPackage("codegen", channel = Option("auto")) - installAnacondaPackage(codegenPackage) - try { - // notifier.info("loadPythonDependencies #5") - deps.packages.foreach(pack => { - pack.index.getOrElse("anaconda").toLowerCase match { - case "anaconda" => installAnacondaPackage(pack) - // case "pypi" => installPyPiPackage(pack) - } - }) - } - catch { - - case rte: RuntimeException => - val sw = new StringWriter - rte.printStackTrace(new PrintWriter(sw)) - notifier.error("", s"Failed to activate environment (runtime) - cause: ${rte.getCause}, message: ${rte.getMessage}, Stack: \n${sw.toString}") - case e: Exception => - val sw = new StringWriter - e.printStackTrace(new PrintWriter(sw)) - notifier.error("", s"Failed to activate environment (other) - type: ${e.getClass.getName}, cause: ${e.getCause}, message: ${e.getMessage}, Stack: \n${sw.toString}") - } - } - override def getGroupIdentifier: String = "spark" override def getRunner(id: String): AmaterasuRunner = runners(id) diff --git a/leader/src/main/resources/log4j.properties b/leader/src/main/resources/log4j.properties deleted file mode 100644 index 4746f39..0000000 --- a/leader/src/main/resources/log4j.properties +++ /dev/null @@ -1,8 +0,0 @@ -# Root logger option -log4j.rootLogger=DEBUG, stdout, file - -# Redirect log messages to console -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.Target=System.out -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n diff --git a/leader/src/main/scala/org/apache/amaterasu/leader/frameworks/spark/SparkSetupProvider.scala b/leader/src/main/scala/org/apache/amaterasu/leader/frameworks/spark/SparkSetupProvider.scala index 8c487c1..05d0af6 100644 --- a/leader/src/main/scala/org/apache/amaterasu/leader/frameworks/spark/SparkSetupProvider.scala +++ b/leader/src/main/scala/org/apache/amaterasu/leader/frameworks/spark/SparkSetupProvider.scala @@ -82,8 +82,8 @@ class SparkSetupProvider extends FrameworkSetupProvider { var mem: Int = 0 if (sparkExecConfigurations.get("spark.yarn.am.memory").isDefined) { mem = MemoryFormatParser.extractMegabytes(sparkExecConfigurations("spark.yarn.am.memory").toString) - } else if (sparkExecConfigurations.get("spark.driver.memeory").isDefined) { - mem = MemoryFormatParser.extractMegabytes(sparkExecConfigurations("spark.driver.memeory").toString) + } else if (sparkExecConfigurations.get("spark.driver.memory").isDefined) { + mem = MemoryFormatParser.extractMegabytes(sparkExecConfigurations("spark.driver.memory").toString) } else if (conf.spark.opts.contains("yarn.am.memory")) { mem = MemoryFormatParser.extractMegabytes(conf.spark.opts("yarn.am.memory")) } else if (conf.spark.opts.contains("driver.memory")) { diff --git a/leader/src/main/scala/org/apache/amaterasu/leader/yarn/ApplicationMaster.scala b/leader/src/main/scala/org/apache/amaterasu/leader/yarn/ApplicationMaster.scala index 1828100..23ae102 100644 --- a/leader/src/main/scala/org/apache/amaterasu/leader/yarn/ApplicationMaster.scala +++ b/leader/src/main/scala/org/apache/amaterasu/leader/yarn/ApplicationMaster.scala @@ -236,7 +236,7 @@ class ApplicationMaster extends AMRMClientAsync.CallbackHandler with Logging { log.info(s"${containers.size()} Containers allocated") for (container <- containers.asScala) { // Launch container by create ContainerLaunchContext if (actionsBuffer.isEmpty) { - log.warn(s"Why actionBuffer empty and i was called?. Container ids: ${containers.map(c => c.getId.getContainerId)}") + log.warn(s"Why actionBuffer empty and I was called?. Container ids: ${containers.map(c => c.getId.getContainerId)}") return } @@ -248,7 +248,7 @@ class ApplicationMaster extends AMRMClientAsync.CallbackHandler with Logging { val ctx = Records.newRecord(classOf[ContainerLaunchContext]) val commands: List[String] = List( - "/bin/bash ./miniconda.sh -b -p $PWD/miniconda && ", + "/bin/bash ./miniconda-install.sh -b -p $PWD/miniconda && ", s"/bin/bash spark/bin/load-spark-env.sh && ", s"java -cp spark/jars/*:executor.jar:spark/conf/:${config.YARN.hadoopHomeDir}/conf/ " + "-Xmx1G " + @@ -270,7 +270,7 @@ class ApplicationMaster extends AMRMClientAsync.CallbackHandler with Logging { "executor.jar" -> executorJar, "amaterasu.properties" -> propFile, // TODO: Nadav/Eyal all of these should move to the executor resource setup - "miniconda.sh" -> setLocalResourceFromPath(Path.mergePaths(jarPath, new Path("/dist/Miniconda2-latest-Linux-x86_64.sh"))), + "miniconda-install.sh" -> setLocalResourceFromPath(Path.mergePaths(jarPath, new Path("/dist/miniconda-install.sh"))), "codegen.py" -> setLocalResourceFromPath(Path.mergePaths(jarPath, new Path("/dist/codegen.py"))), "runtime.py" -> setLocalResourceFromPath(Path.mergePaths(jarPath, new Path("/dist/runtime.py"))), "spark-version-info.properties" -> setLocalResourceFromPath(Path.mergePaths(jarPath, new Path("/dist/spark-version-info.properties"))), diff --git a/leader/src/main/scripts/ama-start-mesos.sh b/leader/src/main/scripts/ama-start-mesos.sh index 18dbed9..8e5b5de 100755 --- a/leader/src/main/scripts/ama-start-mesos.sh +++ b/leader/src/main/scripts/ama-start-mesos.sh @@ -126,9 +126,9 @@ if ! ls ${BASEDIR}/dist/spark*.tgz 1> /dev/null 2>&1; then #wget https://d3kbcqa49mib13.cloudfront.net/spark-2.2.1-bin-hadoop2.7.tgz -P ${BASEDIR}/dist wget http://apache.mirror.digitalpacific.com.au/spark/spark-2.2.1/spark-2.2.1-bin-hadoop2.7.tgz -P ${BASEDIR}/dist fi -if [ ! -f ${BASEDIR}/dist/Miniconda2-latest-Linux-x86_64.sh ]; then +if [ ! -f ${BASEDIR}/dist/miniconda-install.sh ]; then echo "${bold}Fetching miniconda distributable ${NC}" - wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -P ${BASEDIR}/dist + wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda-install.sh -P ${BASEDIR}/dist fi cp ${BASEDIR}/amaterasu.properties ${BASEDIR}/dist eval $CMD | grep "===>" diff --git a/leader/src/main/scripts/ama-start-yarn.sh b/leader/src/main/scripts/ama-start-yarn.sh index c0f8d52..d298613 100755 --- a/leader/src/main/scripts/ama-start-yarn.sh +++ b/leader/src/main/scripts/ama-start-yarn.sh @@ -136,9 +136,9 @@ fi echo $CMD -if [ ! -f ${BASEDIR}/dist/Miniconda2-latest-Linux-x86_64.sh ]; then +if [ ! -f ${BASEDIR}/dist/miniconda-install.sh ]; then echo "${bold}Fetching miniconda distributable ${NC}" - wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -P ${BASEDIR}/dist + wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda-install.sh -P ${BASEDIR}/dist fi diff --git a/local/amaterasu.properties b/local/amaterasu.properties new file mode 100755 index 0000000..5c178c9 --- /dev/null +++ b/local/amaterasu.properties @@ -0,0 +1,17 @@ +zk=127.0.0.1 +version=0.2.0-incubating-rc2 +master=192.168.1.70 +user=amaterasu +mode=yarn +webserver.port=8000 +webserver.root=dist +spark.version=2.2.1-bin-hadoop2.7 +yarn.queue=default +yarn.jarspath=hdfs:///apps/amaterasu +spark.home=/usr/hdp/current/spark2-client +#spark.home=/opt/cloudera/parcels/SPARK2-2.1.0.cloudera2-1.cdh5.7.0.p0.171658/lib/spark2 +yarn.hadoop.home.dir=/etc/hadoop +#spark.home=/Users/arun/apps/spark-2.2.1-bin-hadoop2.7 +#yarn.hadoop.home.dir=/Users/arun/apps/hadoop-2.7.3 +spark.opts.spark.yarn.am.extraJavaOptions="-Dhdp.version=2.6.4.0-91" +spark.opts.spark.driver.extraJavaOptions="-Dhdp.version=2.6.4.0-91" \ No newline at end of file ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services