Repository: incubator-predictionio Updated Branches: refs/heads/feature/xbuild [created] a8290dce6
[PIO-30] Set up a cross build for Spark 2.0 and Scala 2.11 Closes #345 Project: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/commit/a8290dce Tree: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/tree/a8290dce Diff: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/diff/a8290dce Branch: refs/heads/feature/xbuild Commit: a8290dce6488f980f9c956d0caa755cfdeb0779d Parents: ae51040 Author: Chan Lee <[email protected]> Authored: Tue Mar 14 15:45:42 2017 -0700 Committer: Donald Szeto <[email protected]> Committed: Tue Mar 14 15:45:42 2017 -0700 ---------------------------------------------------------------------- .travis.yml | 35 +++++-- bin/install.sh | 2 +- build.sbt | 98 ++++++++++++++++++-- common/build.sbt | 7 +- conf/pio-env.sh.template | 1 + conf/pio-env.sh.travis | 5 +- conf/pio-vendors.sh | 31 +++++++ conf/set_build_profile.sh | 31 +++++++ core/build.sbt | 6 +- .../predictionio/workflow/CreateServer.scala | 3 +- data/build.sbt | 7 +- .../predictionio/data/api/EventServer.scala | 5 +- .../predictionio/data/view/DataView.scala | 13 +-- .../data/SparkVersionDependent.scala | 30 ++++++ .../data/SparkVersionDependent.scala | 30 ++++++ make-distribution.sh | 9 +- project/Build.scala | 23 +++++ project/assembly.sbt | 2 +- project/plugins.sbt | 4 +- .../data/storage/jdbc/JDBCPEvents.scala | 26 ++++-- tests/Dockerfile | 22 +++-- tests/README.md | 6 ++ tests/build-docker.sh | 50 ---------- tests/build_docker.sh | 68 ++++++++++++++ tests/docker-files/init.sh | 2 +- tests/docker-files/set_build_profile.sh | 31 +++++++ .../engines/recommendation-engine/build.sbt | 8 +- .../engines/recommendation-engine/pio.sbt | 4 + tests/pio_tests/scenarios/quickstart_test.py | 8 +- tests/pio_tests/utils.py | 5 +- tests/script.travis.sh | 8 +- tests/unit.sh | 10 +- tools/build.sbt | 12 +-- .../predictionio/tools/admin/AdminAPI.scala | 3 +- .../tools/export/EventsToFile.scala | 6 +- 35 files changed, 472 insertions(+), 139 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/.travis.yml ---------------------------------------------------------------------- diff --git a/.travis.yml b/.travis.yml index 634d286..3921f1a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,9 +29,6 @@ branches: language: scala -scala: - - 2.10.5 - jdk: - oraclejdk8 @@ -44,12 +41,31 @@ cache: false env: matrix: - - BUILD_TYPE=Unit METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL - - BUILD_TYPE=Integration METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL - - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=5 - - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=5 - - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=1 - - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=1 + - BUILD_TYPE=Unit BUILD_PROFILE=scala-2.10 + METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.10 + METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.10 + METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=5 + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.10 + METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=5 + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.10 + METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=1 + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.10 + METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=1 + + - BUILD_TYPE=Unit BUILD_PROFILE=scala-2.11 + METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.11 + METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.11 + METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=5 + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.11 + METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=5 + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.11 + METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=1 + - BUILD_TYPE=Integration BUILD_PROFILE=scala-2.11 + METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=1 before_install: - unset SBT_OPTS JVM_OPTS @@ -64,6 +80,7 @@ before_script: - ./tests/before_script.travis.sh script: + - source conf/set_build_profile.sh ${BUILD_PROFILE} - ./tests/script.travis.sh after_script: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/bin/install.sh ---------------------------------------------------------------------- diff --git a/bin/install.sh b/bin/install.sh index e485df9..7431b09 100755 --- a/bin/install.sh +++ b/bin/install.sh @@ -18,7 +18,7 @@ # OS=`uname` -SPARK_VERSION=1.6.2 +SPARK_VERSION=1.6.3 # Looks like support for Elasticsearch 2.0 will require 2.0 so deferring ELASTICSEARCH_VERSION=1.7.5 HBASE_VERSION=1.2.2 http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/build.sbt ---------------------------------------------------------------------- diff --git a/build.sbt b/build.sbt index fa5ba29..330dd06 100644 --- a/build.sbt +++ b/build.sbt @@ -16,6 +16,54 @@ */ import UnidocKeys._ +import scala.math.Ordering.Implicits._ + +lazy val profiles: Map[String, Profile] = + Map( + "scala-2.10" -> Profile( + name="scala-2.10", + scalaVersion="2.10.5", + sparkVersion="1.6.3", + hadoopVersion="2.6.4", + akkaVersion="2.3.15"), + + "scala-2.11" -> Profile( + name="scala-2.11", + scalaVersion="2.11.8", + sparkVersion="2.0.2", + hadoopVersion="2.7.3", + akkaVersion="2.4.10")) + +lazy val defaultProfile = "scala-2.10" + +buildProfile := { + val profileName = sys.props.get("build.profile").getOrElse(defaultProfile) + val profile = profiles(profileName) + val scalaVersion = profile.scalaVersion + + val sparkVersion = sys.props.get("spark.version") map { sv => + if ((versionMajor(sv), versionMinor(sv)) < (1, 6)) { + throw new IllegalArgumentException("Spark versions below 1.6 are no longer supported") + } else if (versionMajor(sv) >= 2 && + (versionMajor(scalaVersion), versionMinor(scalaVersion)) < (2, 11)) { + throw new IllegalArgumentException("Spark 2.x requires Scala 2.11 and above") + } else { + sv + } + } getOrElse(profile.sparkVersion) + + val hadoopVersion = sys.props.get("hadoop.version").getOrElse(profile.hadoopVersion) + + if (hadoopVersion != profile.hadoopVersion || sparkVersion != profile.sparkVersion) { + profile.copy( + name = profile.name + "-custom", + sparkVersion = sparkVersion, + hadoopVersion = hadoopVersion) + } else { + profile + } +} + name := "apache-predictionio-parent" @@ -23,20 +71,29 @@ version in ThisBuild := "0.11.0-SNAPSHOT" organization in ThisBuild := "org.apache.predictionio" -scalaVersion in ThisBuild := "2.10.5" +scalaVersion in ThisBuild := { + val version = buildProfile.value.scalaVersion + if (versionMinor(version) < 11) { + sLog.value.warn(s"Scala version ${version} is deprecated!") + } + version +} scalacOptions in ThisBuild ++= Seq("-deprecation", "-unchecked", "-feature") scalacOptions in (ThisBuild, Test) ++= Seq("-Yrangepos") - fork in (ThisBuild, run) := true javacOptions in (ThisBuild, compile) ++= Seq("-source", "1.7", "-target", "1.7", "-Xlint:deprecation", "-Xlint:unchecked") +akkaVersion in ThisBuild := buildProfile.value.akkaVersion + json4sVersion in ThisBuild := "3.2.10" -sparkVersion in ThisBuild := "1.6.3" +sparkVersion in ThisBuild := buildProfile.value.sparkVersion + +hadoopVersion in ThisBuild := buildProfile.value.hadoopVersion val pioBuildInfoSettings = buildInfoSettings ++ Seq( sourceGenerators in Compile <+= buildInfo, @@ -45,9 +102,23 @@ val pioBuildInfoSettings = buildInfoSettings ++ Seq( version, scalaVersion, sbtVersion, - sparkVersion), + sparkVersion, + hadoopVersion), buildInfoPackage := "org.apache.predictionio.core") +// Used temporarily to modify genjavadoc version to "0.10" until unidoc updates it +val genjavadocSettings: Seq[sbt.Def.Setting[_]] = Seq( + libraryDependencies += compilerPlugin("com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.10" cross CrossVersion.full), + scalacOptions <+= target map (t => "-P:genjavadoc:out=" + (t / "java"))) + +// Paths specified below are required for the tests, since thread pools initialized +// in unit tests of data subproject are used later in spark jobs executed in core. +// They need to have properly configured classloaders to load core classes for spark +// in subsequent tests. +def coreClasses(baseDirectory: java.io.File, scalaVersion: String) = Seq( + baseDirectory / s"../core/target/scala-${versionPrefix(scalaVersion)}/classes", + baseDirectory / s"../core/target/scala-${versionPrefix(scalaVersion)}/test-classes") + val conf = file("conf") val commonSettings = Seq( @@ -61,7 +132,9 @@ val common = (project in file("common")). val data = (project in file("data")). dependsOn(common). settings(commonSettings: _*). - settings(genjavadocSettings: _*) + settings(genjavadocSettings: _*). + settings(unmanagedSourceDirectories in Compile += + sourceDirectory.value / s"main/spark-${versionMajor(sparkVersion.value)}") val dataElasticsearch1 = (project in file("storage/elasticsearch1")). settings(commonSettings: _*). @@ -99,11 +172,13 @@ val tools = (project in file("tools")). dependsOn(data). settings(commonSettings: _*). settings(genjavadocSettings: _*). - enablePlugins(SbtTwirl) + enablePlugins(SbtTwirl). + settings(fullClasspath in Test ++= coreClasses(baseDirectory.value, scalaVersion.value)) val e2 = (project in file("e2")). settings(commonSettings: _*). - settings(genjavadocSettings: _*) + settings(genjavadocSettings: _*). + settings(fullClasspath in Test ++= coreClasses(baseDirectory.value, scalaVersion.value)) val root = (project in file(".")). settings(commonSettings: _*). @@ -232,3 +307,12 @@ parallelExecution := false parallelExecution in Global := false testOptions in Test += Tests.Argument("-oDF") + +printProfile := { + val profile = buildProfile.value + println(s"PIO_PROFILE_VERSION=${profile.name}") + println(s"PIO_SCALA_VERSION=${profile.scalaVersion}") + println(s"PIO_SPARK_VERSION=${profile.sparkVersion}") + println(s"PIO_HADOOP_VERSION=${profile.hadoopVersion}") + println(s"PIO_AKKA_VERSION=${profile.akkaVersion}") +} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/common/build.sbt ---------------------------------------------------------------------- diff --git a/common/build.sbt b/common/build.sbt index e7050d4..47fba3b 100644 --- a/common/build.sbt +++ b/common/build.sbt @@ -18,9 +18,10 @@ name := "apache-predictionio-common" libraryDependencies ++= Seq( - "io.spray" %% "spray-can" % "1.3.2", - "io.spray" %% "spray-routing" % "1.3.2", - "org.spark-project.akka" %% "akka-actor" % "2.3.4-spark" + "io.spray" %% "spray-can" % "1.3.3", + "io.spray" %% "spray-routing" % "1.3.3", + "com.typesafe.akka" %% "akka-actor" % akkaVersion.value, + "com.typesafe.akka" %% "akka-slf4j" % akkaVersion.value ) pomExtra := childrenPomExtra.value http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/conf/pio-env.sh.template ---------------------------------------------------------------------- diff --git a/conf/pio-env.sh.template b/conf/pio-env.sh.template index 0d76102..57185ba 100644 --- a/conf/pio-env.sh.template +++ b/conf/pio-env.sh.template @@ -24,6 +24,7 @@ # you need to change these to fit your site. # SPARK_HOME: Apache Spark is a hard dependency and must be configured. +# SPARK_HOME=$PIO_HOME/vendors/spark-2.0.2-bin-hadoop2.7 SPARK_HOME=$PIO_HOME/vendors/spark-1.6.3-bin-hadoop2.6 POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-9.4-1204.jdbc41.jar http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/conf/pio-env.sh.travis ---------------------------------------------------------------------- diff --git a/conf/pio-env.sh.travis b/conf/pio-env.sh.travis index 94b30cd..80e3332 100644 --- a/conf/pio-env.sh.travis +++ b/conf/pio-env.sh.travis @@ -24,7 +24,8 @@ # you need to change these to fit your site. # SPARK_HOME: Apache Spark is a hard dependency and must be configured. -SPARK_HOME=$PIO_HOME/vendors/spark-1.3.0-bin-hadoop2.4 +# it is set up in script.travis.sh +SPARK_HOME=$SPARK_HOME # Filesystem paths where PredictionIO uses as block storage. PIO_FS_BASEDIR=$HOME/.pio_store @@ -41,7 +42,7 @@ PIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs PIO_STORAGE_SOURCES_LOCALFS_PATH=$PIO_FS_BASEDIR/models PIO_STORAGE_SOURCES_HBASE_TYPE=hbase -PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.0.0 +PIO_STORAGE_SOURCES_HBASE_HOME=$HBASE_HOME # Storage Data Sources (pgsql) PIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/conf/pio-vendors.sh ---------------------------------------------------------------------- diff --git a/conf/pio-vendors.sh b/conf/pio-vendors.sh new file mode 100644 index 0000000..00e7783 --- /dev/null +++ b/conf/pio-vendors.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# IMPORTANT: PIO_*_VERSION for dependencies must be set before envoking this script. +# `source conf/set_build_profile.sh $BUILD_PROFILE` to get the proper versions + +PGSQL_JAR=postgresql-9.4-1204.jdbc41.jar +PGSQL_DOWNLOAD=https://jdbc.postgresql.org/download/${PGSQL_JAR} + +HADOOP_MAJOR=`echo $PIO_HADOOP_VERSION | awk -F. '{print $1 "." $2}'` +SPARK_DIR=spark-${PIO_SPARK_VERSION}-bin-hadoop${HADOOP_MAJOR} +SPARK_ARCHIVE=${SPARK_DIR}.tgz +SPARK_DOWNLOAD=http://d3kbcqa49mib13.cloudfront.net/${SPARK_ARCHIVE} +# ELASTICSEARCH_DOWNLOAD +# 5.x https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-${PIO_ELASTICSEARCH_VERSION}.tar.gz +# 1.x https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${PIO_ELASTICSEARCH_VERSION}.tar.gz http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/conf/set_build_profile.sh ---------------------------------------------------------------------- diff --git a/conf/set_build_profile.sh b/conf/set_build_profile.sh new file mode 100755 index 0000000..397e824 --- /dev/null +++ b/conf/set_build_profile.sh @@ -0,0 +1,31 @@ +#!/bin/bash - +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Sets version of profile dependencies from sbt configuration. +# eg. Run `source ./set_build_profile.sh scala-2.11` + +set -e + +if [[ "$#" -ne 1 ]]; then + echo "Usage: set-build-profile.sh <build-profile>" + exit 1 +fi + +set -a +eval `sbt/sbt --error 'set showSuccess := false' -Dbuild.profile=$1 printProfile | grep '.*_VERSION=.*'` +set +a http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/core/build.sbt ---------------------------------------------------------------------- diff --git a/core/build.sbt b/core/build.sbt index bfb8bf3..584133c 100644 --- a/core/build.sbt +++ b/core/build.sbt @@ -18,9 +18,9 @@ name := "apache-predictionio-core" libraryDependencies ++= Seq( - "com.github.scopt" %% "scopt" % "3.3.0", + "com.github.scopt" %% "scopt" % "3.5.0", "com.google.code.gson" % "gson" % "2.5", - "com.google.guava" % "guava" % "18.0", + "com.google.guava" % "guava" % "19.0", "com.twitter" %% "chill" % "0.7.2" exclude("com.esotericsoftware.minlog", "minlog"), "com.twitter" %% "chill-bijection" % "0.7.2", @@ -39,8 +39,6 @@ libraryDependencies ++= Seq( "org.slf4j" % "slf4j-log4j12" % "1.7.18", "org.specs2" %% "specs2" % "2.3.13" % "test") -//testOptions := Seq(Tests.Filter(s => Seq("Dev").exists(s.contains(_)))) - parallelExecution in Test := false pomExtra := childrenPomExtra.value http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala b/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala index 6f274bc..31b7831 100644 --- a/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala +++ b/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala @@ -51,7 +51,6 @@ import spray.routing._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future import scala.concurrent.duration._ -import scala.concurrent.future import scala.language.existentials import scala.util.{Failure, Random, Success} import scalaj.http.HttpOptions @@ -548,7 +547,7 @@ class ServerActor[Q, P]( "prediction" -> prediction)) ++ queryPrId // At this point args.accessKey should be Some(String). val accessKey = args.accessKey.getOrElse("") - val f: Future[Int] = future { + val f: Future[Int] = Future { scalaj.http.Http( s"http://${args.eventServerIp}:${args.eventServerPort}/" + s"events.json?accessKey=$accessKey").postData( http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/data/build.sbt ---------------------------------------------------------------------- diff --git a/data/build.sbt b/data/build.sbt index f5e95b5..27eeb78 100644 --- a/data/build.sbt +++ b/data/build.sbt @@ -19,12 +19,13 @@ name := "apache-predictionio-data" libraryDependencies ++= Seq( "com.github.nscala-time" %% "nscala-time" % "2.6.0", + "com.google.guava" % "guava" % "19.0", "commons-codec" % "commons-codec" % "1.9", "io.spray" %% "spray-can" % "1.3.3", "io.spray" %% "spray-routing" % "1.3.3", "io.spray" %% "spray-testkit" % "1.3.3" % "test", "mysql" % "mysql-connector-java" % "5.1.37" % "optional", - "org.apache.hadoop" % "hadoop-common" % "2.6.2" + "org.apache.hadoop" % "hadoop-common" % hadoopVersion.value exclude("javax.servlet", "servlet-api"), "org.apache.zookeeper" % "zookeeper" % "3.4.7" exclude("org.slf4j", "slf4j-api") @@ -36,7 +37,9 @@ libraryDependencies ++= Seq( "org.json4s" %% "json4s-ext" % json4sVersion.value, "org.scalatest" %% "scalatest" % "2.1.7" % "test", "org.slf4j" % "slf4j-log4j12" % "1.7.18", - "org.spark-project.akka" %% "akka-actor" % "2.3.4-spark", + "org.postgresql" % "postgresql" % "9.4.1209", + "org.scalikejdbc" %% "scalikejdbc" % "2.3.2", + "com.typesafe.akka" %% "akka-actor" % akkaVersion.value, "org.specs2" %% "specs2" % "2.3.13" % "test") parallelExecution in Test := false http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala ---------------------------------------------------------------------- diff --git a/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala b/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala index 648316e..b4392ff 100644 --- a/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala +++ b/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala @@ -48,8 +48,7 @@ import spray.httpx.Json4sSupport import spray.routing._ import spray.routing.authentication.Authentication -import scala.concurrent.ExecutionContext -import scala.concurrent.Future +import scala.concurrent.{ExecutionContext, Future} import scala.util.{Try, Success, Failure} class EventServiceActor( @@ -635,7 +634,7 @@ object EventServer { } object Run { - def main(args: Array[String]) { + def main(args: Array[String]): Unit = { EventServer.createEventServer(EventServerConfig( ip = "0.0.0.0", port = 7070)) http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala ---------------------------------------------------------------------- diff --git a/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala b/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala index 4866b5d..1c47e10 100644 --- a/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala +++ b/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala @@ -20,6 +20,7 @@ package org.apache.predictionio.data.view import org.apache.predictionio.annotation.Experimental import org.apache.predictionio.data.storage.Event +import org.apache.predictionio.data.SparkVersionDependent import grizzled.slf4j.Logger import org.apache.predictionio.data.store.PEventStore @@ -27,7 +28,7 @@ import org.apache.predictionio.data.store.PEventStore import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame import org.apache.spark.sql.SaveMode -import org.apache.spark.sql.SQLContext +import org.apache.spark.SparkContext import org.joda.time.DateTime import scala.reflect.ClassTag @@ -64,11 +65,11 @@ object DataView { untilTime: Option[DateTime] = None, conversionFunction: Event => Option[E], name: String = "", - version: String = "")(sqlContext: SQLContext): DataFrame = { + version: String = "")(sc: SparkContext): DataFrame = { @transient lazy val logger = Logger[this.type] - val sc = sqlContext.sparkContext + val sqlSession = SparkVersionDependent.sqlSession(sc) val beginTime = startTime match { case Some(t) => t @@ -85,7 +86,7 @@ object DataView { val baseDir = s"${sys.env("PIO_FS_BASEDIR")}/view" val fileName = s"$baseDir/$name-$appName-$hash.parquet" try { - sqlContext.read.parquet(fileName) + sqlSession.read.parquet(fileName) } catch { case e: java.io.FileNotFoundException => logger.info("Cached copy not found, reading from DB.") @@ -96,11 +97,11 @@ object DataView { startTime = startTime, untilTime = Some(endTime))(sc) .flatMap((e) => conversionFunction(e)) - import sqlContext.implicits._ // needed for RDD.toDF() + import sqlSession.implicits._ // needed for RDD.toDF() val resultDF = result.toDF() resultDF.write.mode(SaveMode.ErrorIfExists).parquet(fileName) - sqlContext.read.parquet(fileName) + sqlSession.read.parquet(fileName) case e: java.lang.RuntimeException => if (e.toString.contains("is not a Parquet file")) { logger.error(s"$fileName does not contain a valid Parquet file. " + http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala ---------------------------------------------------------------------- diff --git a/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala b/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala new file mode 100644 index 0000000..0652e0b --- /dev/null +++ b/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.predictionio.data + +import org.apache.spark.SparkContext +import org.apache.spark.sql.SQLContext + +object SparkVersionDependent { + + def sqlSession(sc: SparkContext): SQLContext = { + return new SQLContext(sc) + } + +} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala ---------------------------------------------------------------------- diff --git a/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala b/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala new file mode 100644 index 0000000..3d07bdf --- /dev/null +++ b/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.predictionio.data + +import org.apache.spark.SparkContext +import org.apache.spark.sql.SparkSession + +object SparkVersionDependent { + + def sqlSession(sc: SparkContext): SparkSession = { + SparkSession.builder().getOrCreate() + } + +} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/make-distribution.sh ---------------------------------------------------------------------- diff --git a/make-distribution.sh b/make-distribution.sh index c360c0e..2000340 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -43,6 +43,9 @@ case $i in ES_VERSION="${i#*=}" shift ;; + -D*) + shift + ;; *) usage exit 1 @@ -61,12 +64,14 @@ fi FWDIR="$(cd `dirname $0`; pwd)" DISTDIR="${FWDIR}/dist" -VERSION=$(grep version ${FWDIR}/build.sbt | grep ThisBuild | grep -o '".*"' | sed 's/"//g') +VERSION=$(grep ^version ${FWDIR}/build.sbt | grep ThisBuild | grep -o '".*"' | sed 's/"//g') echo "Building binary distribution for PredictionIO $VERSION..." cd ${FWDIR} -sbt/sbt common/publishLocal data/publishLocal core/publishLocal e2/publishLocal dataElasticsearch1/assembly dataElasticsearch/assembly dataHbase/assembly dataHdfs/assembly dataJdbc/assembly dataLocalfs/assembly tools/assembly +sbt/sbt "$@" common/publishLocal data/publishLocal core/publishLocal e2/publishLocal \ +dataElasticsearch1/assembly dataElasticsearch/assembly dataHbase/assembly dataHdfs/assembly \ +dataJdbc/assembly dataLocalfs/assembly tools/assembly cd ${FWDIR} rm -rf ${DISTDIR} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/project/Build.scala ---------------------------------------------------------------------- diff --git a/project/Build.scala b/project/Build.scala index 885073a..cc37912 100644 --- a/project/Build.scala +++ b/project/Build.scala @@ -18,6 +18,13 @@ import sbt._ import Keys._ +case class Profile( + name: String, + scalaVersion: String, + sparkVersion: String, + hadoopVersion: String, + akkaVersion: String) + object PIOBuild extends Build { val elasticsearchVersion = SettingKey[String]( "elasticsearch-version", @@ -28,7 +35,23 @@ object PIOBuild extends Build { val sparkVersion = SettingKey[String]( "spark-version", "The version of Apache Spark used for building.") + val hadoopVersion = SettingKey[String]( + "hadoop-version", + "The version of Apache Hadoop used for building") + val akkaVersion = SettingKey[String]( + "akka-version", + "The version of Akka used for building") + val buildProfile = SettingKey[Profile]( + "build-profile", + "The dependency profile used for the build") val childrenPomExtra = SettingKey[scala.xml.NodeSeq]( "children-pom-extra", "Extra POM data for children projects.") + + def versionPrefix(versionString: String) = + versionString.split('.').take(2).mkString(".") + def versionMajor(versionString: String) = versionString.split('.')(0).toInt + def versionMinor(versionString: String) = versionString.split('.')(1).toInt + + lazy val printProfile = taskKey[Unit]("Print settings for the chosen profile") } http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/project/assembly.sbt ---------------------------------------------------------------------- diff --git a/project/assembly.sbt b/project/assembly.sbt index 49085ee..39c1bb8 100644 --- a/project/assembly.sbt +++ b/project/assembly.sbt @@ -1 +1 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.1") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/project/plugins.sbt ---------------------------------------------------------------------- diff --git a/project/plugins.sbt b/project/plugins.sbt index 3edaf67..2f21e00 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -2,7 +2,7 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.3.2") addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0") -addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.0.3") +addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.1.1") addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "1.1") @@ -10,4 +10,4 @@ addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.8.0") resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/" -addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.1.0") +addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.3.5") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala ---------------------------------------------------------------------- diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala index 2e6ee83..ff16d5d 100644 --- a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala +++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala @@ -15,16 +15,17 @@ * limitations under the License. */ - package org.apache.predictionio.data.storage.jdbc import java.sql.{DriverManager, ResultSet} import com.github.nscala_time.time.Imports._ -import org.apache.predictionio.data.storage.{DataMap, Event, PEvents, StorageClientConfig} +import org.apache.predictionio.data.storage.{ + DataMap, Event, PEvents, StorageClientConfig} +import org.apache.predictionio.data.SparkVersionDependent import org.apache.spark.SparkContext import org.apache.spark.rdd.{JdbcRDD, RDD} -import org.apache.spark.sql.{SQLContext, SaveMode} +import org.apache.spark.sql.SaveMode import org.json4s.JObject import org.json4s.native.Serialization import scalikejdbc._ @@ -32,6 +33,7 @@ import scalikejdbc._ /** JDBC implementation of [[PEvents]] */ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String) extends PEvents { @transient private implicit lazy val formats = org.json4s.DefaultFormats + def find( appId: Int, channelId: Option[Int] = None, @@ -42,6 +44,7 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String eventNames: Option[Seq[String]] = None, targetEntityType: Option[Option[String]] = None, targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event] = { + val lower = startTime.map(_.getMillis).getOrElse(0.toLong) /** Change the default upper bound from +100 to +1 year because MySQL's * FROM_UNIXTIME(t) will return NULL if we use +100 years. @@ -118,13 +121,12 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String } def write(events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = { - val sqlContext = new SQLContext(sc) - - import sqlContext.implicits._ + val sqlSession = SparkVersionDependent.sqlSession(sc) + import sqlSession.implicits._ val tableName = JDBCUtils.eventTableName(namespace, appId, channelId) - val eventTableColumns = Seq[String]( + val eventsColumnNamesInDF = Seq[String]( "id" , "event" , "entityType" @@ -139,11 +141,16 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String , "creationTime" , "creationTimeZone") + // Necessary for handling postgres "case-sensitivity" + val eventsColumnNamesInSQL = JDBCUtils.driverType(client) match { + case "postgresql" => eventsColumnNamesInDF.map(_.toLowerCase) + case _ => eventsColumnNamesInDF + } val eventDF = events.map(x => Event(eventId = None, event = x.event, entityType = x.entityType, entityId = x.entityId, targetEntityType = x.targetEntityType, targetEntityId = x.targetEntityId, properties = x.properties, - eventTime = x.eventTime, tags = x.tags, prId= x.prId, + eventTime = x.eventTime, tags = x.tags, prId = x.prId, creationTime = x.eventTime) ) .map { event => @@ -160,9 +167,8 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String , event.prId , new java.sql.Timestamp(event.creationTime.getMillis) , event.creationTime.getZone.getID) - }.toDF(eventTableColumns:_*) + }.toDF(eventsColumnNamesInSQL:_*) - // spark version 1.4.0 or higher val prop = new java.util.Properties prop.setProperty("user", config.properties("USERNAME")) prop.setProperty("password", config.properties("PASSWORD")) http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/Dockerfile ---------------------------------------------------------------------- diff --git a/tests/Dockerfile b/tests/Dockerfile index 94f5688..a04b79c 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -17,17 +17,25 @@ FROM predictionio/pio -ENV SPARK_VERSION 1.6.3 -ENV ELASTICSEARCH_VERSION 5.2.1 -ENV HBASE_VERSION 1.0.0 +ARG SPARK_ARCHIVE +ARG SPARK_DIR +ARG PGSQL_JAR +ARG BUILD_PROFILE +ARG PIO_SCALA_VERSION +ARG PIO_SPARK_VERSION + +ENV BUILD_PROFILE=$BUILD_PROFILE +ENV PIO_SCALA_VERSION=$PIO_SCALA_VERSION +ENV PIO_SPARK_VERSION=$PIO_SPARK_VERSION -ADD docker-files/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz /vendors # WORKAROUND: es-hadoop stops on RDD#take(1) -RUN echo "spark.locality.wait.node 0s" > /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6/conf/spark-defaults.conf -ENV SPARK_HOME /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6 +ADD docker-files/${SPARK_ARCHIVE} /vendors +RUN echo "spark.locality.wait.node 0s" > /vendors/${SPARK_DIR}/conf/spark-defaults.conf +ENV SPARK_HOME /vendors/${SPARK_DIR} -COPY docker-files/postgresql-9.4-1204.jdbc41.jar /drivers/postgresql-9.4-1204.jdbc41.jar +COPY docker-files/${PGSQL_JAR} /drivers/${PGSQL_JAR} COPY docker-files/init.sh init.sh +COPY docker-files/set_build_profile.sh set_build_profile.sh COPY docker-files/env-conf/hbase-site.xml ${PIO_HOME}/conf/hbase-site.xml COPY docker-files/env-conf/pio-env.sh ${PIO_HOME}/conf/pio-env.sh COPY docker-files/pgpass /root/.pgpass http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/README.md ---------------------------------------------------------------------- diff --git a/tests/README.md b/tests/README.md index 95b3fdf..236d168 100644 --- a/tests/README.md +++ b/tests/README.md @@ -34,6 +34,12 @@ To download the image run: $ docker pull predictionio/pio-testing ``` +To build the image use the script: +``` +$ tests/docker-build.sh <image_name> +``` +This is necessary to infer proper versions of dependencies e.g. Spark to be included in the image. + The most convenient way to make use of it is to execute ***run_docker.sh*** script passing it the configuration, the path to PredictionIO's repository with archived snapshot and the command to run. When no command is provided it opens a bash shell inside the docker image. Example of usage: ```sh $ ./run_docker.sh ELASTICSEARCH HBASE LOCALFS \ http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/build-docker.sh ---------------------------------------------------------------------- diff --git a/tests/build-docker.sh b/tests/build-docker.sh deleted file mode 100755 index ed43715..0000000 --- a/tests/build-docker.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -ex - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -if [ ! -f $DIR/docker-files/spark-1.6.3-bin-hadoop2.6.tgz ]; then - wget http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz - mv spark-1.6.3-bin-hadoop2.6.tgz $DIR/docker-files/ -fi - -if [ ! -f $DIR/docker-files/postgresql-9.4-1204.jdbc41.jar ]; then - wget https://jdbc.postgresql.org/download/postgresql-9.4-1204.jdbc41.jar - mv postgresql-9.4-1204.jdbc41.jar $DIR/docker-files/ -fi - -docker pull predictionio/pio-testing-base -pushd $DIR/.. -if [ -z "$ES_VERSION" ]; then - ./make-distribution.sh -else - ./make-distribution.sh --with-es=$ES_VERSION -fi -sbt/sbt clean -mkdir assembly -cp dist/lib/*.jar assembly/ -mkdir -p lib/spark -cp dist/lib/spark/*.jar lib/spark -rm *.tar.gz -docker build -t predictionio/pio . -popd - -if [ "$ES_VERSION" = "1" ]; then - docker build -t predictionio/pio-testing-es1 -f $DIR/Dockerfile-es1 $DIR -else - docker build -t predictionio/pio-testing $DIR -fi http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/build_docker.sh ---------------------------------------------------------------------- diff --git a/tests/build_docker.sh b/tests/build_docker.sh new file mode 100755 index 0000000..13a6f6a --- /dev/null +++ b/tests/build_docker.sh @@ -0,0 +1,68 @@ +#!/bin/bash -ex + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +docker pull predictionio/pio-testing-base + +pushd $DIR/.. + +source conf/set_build_profile.sh ${BUILD_PROFILE} +source conf/pio-vendors.sh +if [ ! -f $DIR/docker-files/${PGSQL_JAR} ]; then + wget $PGSQL_DOWNLOAD + mv ${PGSQL_JAR} $DIR/docker-files/ +fi +if [ ! -f $DIR/docker-files/${SPARK_ARCHIVE} ]; then + wget $SPARK_DOWNLOAD + mv $SPARK_ARCHIVE $DIR/docker-files/ +fi + +if [ -z "$ES_VERSION" ]; then + ./make-distribution.sh -Dbuild.profile=${BUILD_PROFILE} +else + ./make-distribution.sh --with-es=$ES_VERSION -Dbuild.profile=${BUILD_PROFILE} +fi +sbt/sbt clean +mkdir assembly +cp dist/lib/*.jar assembly/ +mkdir -p lib/spark +if [ -e dist/lib/spark/*.jar ]; then + cp dist/lib/spark/*.jar lib/spark +fi +rm *.tar.gz +docker build -t predictionio/pio . +popd + +if [ "$ES_VERSION" = "1" ]; then + docker build -t predictionio/pio-testing-es1 -f $DIR/Dockerfile-es1 $DIR \ + --build-arg SPARK_ARCHIVE=$SPARK_ARCHIVE \ + --build-arg SPARK_DIR=$SPARK_DIR \ + --build-arg PGSQL_JAR=$PGSQL_JAR \ + --build-arg BUILD_PROFILE=$BUILD_PROFILE \ + --build-arg PIO_SCALA_VERSION=$PIO_SCALA_VERSION \ + --build-arg PIO_SPARK_VERSION=$PIO_SPARK_VERSION +else + docker build -t predictionio/pio-testing $DIR \ + --build-arg SPARK_ARCHIVE=$SPARK_ARCHIVE \ + --build-arg SPARK_DIR=$SPARK_DIR \ + --build-arg PGSQL_JAR=$PGSQL_JAR \ + --build-arg BUILD_PROFILE=$BUILD_PROFILE \ + --build-arg PIO_SCALA_VERSION=$PIO_SCALA_VERSION \ + --build-arg PIO_SPARK_VERSION=$PIO_SPARK_VERSION +fi http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/docker-files/init.sh ---------------------------------------------------------------------- diff --git a/tests/docker-files/init.sh b/tests/docker-files/init.sh index 8dc08ea..fc12ffe 100755 --- a/tests/docker-files/init.sh +++ b/tests/docker-files/init.sh @@ -17,7 +17,7 @@ # set -e -export PYTHONPATH=/$PIO_HOME/tests:$PYTHONPATH +export PYTHONPATH=$PIO_HOME/tests:$PYTHONPATH echo "Sleeping $SLEEP_TIME seconds for all services to be ready..." sleep $SLEEP_TIME eval $@ http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/docker-files/set_build_profile.sh ---------------------------------------------------------------------- diff --git a/tests/docker-files/set_build_profile.sh b/tests/docker-files/set_build_profile.sh new file mode 100755 index 0000000..141dd46 --- /dev/null +++ b/tests/docker-files/set_build_profile.sh @@ -0,0 +1,31 @@ +#!/bin/bash - +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Sets version of profile dependencies from sbt configuration. +# eg. Run `source ./set_build_profile.sh scala-2.11` + +set -e + +if [[ "$#" -ne 1 ]]; then + echo "Usage: set-build-profile.sh <build-profile>" + exit 1 +fi + +set -a +eval `$PIO_HOME/sbt/sbt --error 'set showSuccess := false' -Dbuild.profile=$1 printProfile | grep '.*_VERSION=.*'` +set +a http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/pio_tests/engines/recommendation-engine/build.sbt ---------------------------------------------------------------------- diff --git a/tests/pio_tests/engines/recommendation-engine/build.sbt b/tests/pio_tests/engines/recommendation-engine/build.sbt index 52e8742..dcb9fb6 100644 --- a/tests/pio_tests/engines/recommendation-engine/build.sbt +++ b/tests/pio_tests/engines/recommendation-engine/build.sbt @@ -19,11 +19,13 @@ import AssemblyKeys._ assemblySettings +scalaVersion in ThisBuild := sys.env.getOrElse("PIO_SCALA_VERSION", "2.10.5") + name := "template-scala-parallel-recommendation" organization := "org.apache.predictionio" libraryDependencies ++= Seq( - "org.apache.predictionio" %% "apache-predictionio-core" % pioVersion.value % "provided", - "org.apache.spark" %% "spark-core" % "1.3.0" % "provided", - "org.apache.spark" %% "spark-mllib" % "1.3.0" % "provided") + "org.apache.predictionio" %% "apache-predictionio-core" % "0.11.0-SNAPSHOT" % "provided", + "org.apache.spark" %% "spark-core" % sys.env.getOrElse("PIO_SPARK_VERSION", "1.6.2") % "provided", + "org.apache.spark" %% "spark-mllib" % sys.env.getOrElse("PIO_SPARK_VERSION", "1.6.2") % "provided") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/pio_tests/engines/recommendation-engine/pio.sbt ---------------------------------------------------------------------- diff --git a/tests/pio_tests/engines/recommendation-engine/pio.sbt b/tests/pio_tests/engines/recommendation-engine/pio.sbt new file mode 100644 index 0000000..5d8c366 --- /dev/null +++ b/tests/pio_tests/engines/recommendation-engine/pio.sbt @@ -0,0 +1,4 @@ +// Generated automatically by pio build. +// Changes in this file will be overridden. + +pioVersion := "0.11.0-SNAPSHOT" http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/pio_tests/scenarios/quickstart_test.py ---------------------------------------------------------------------- diff --git a/tests/pio_tests/scenarios/quickstart_test.py b/tests/pio_tests/scenarios/quickstart_test.py index 1c5f422..ab7180d 100644 --- a/tests/pio_tests/scenarios/quickstart_test.py +++ b/tests/pio_tests/scenarios/quickstart_test.py @@ -89,8 +89,8 @@ class QuickStartTest(BaseTestCase): self.app.build(engine_dir=engine_path) self.log.info("Training...") self.app.train(engine_dir=engine_path) - self.log.info("Deploying and waiting 15s for it to start...") - self.app.deploy(wait_time=15, engine_dir=engine_path) + self.log.info("Deploying and waiting 30s for it to start...") + self.app.deploy(wait_time=30, engine_dir=engine_path) self.log.info("Sending a single query and checking results") user_query = { "user": 1, "num": 4 } @@ -153,8 +153,8 @@ class QuickStartTest(BaseTestCase): self.app.build() self.log.info("Training...") self.app.train() - self.log.info("Deploying and waiting 15s for it to start...") - self.app.deploy(wait_time=15) + self.log.info("Deploying and waiting 35s for it to start...") + self.app.deploy(wait_time=35) self.log.info("Testing pio commands outside of engine directory") self.engine_dir_test() http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/pio_tests/utils.py ---------------------------------------------------------------------- diff --git a/tests/pio_tests/utils.py b/tests/pio_tests/utils.py index e6c5b0b..05c8d1c 100644 --- a/tests/pio_tests/utils.py +++ b/tests/pio_tests/utils.py @@ -151,10 +151,11 @@ def import_events_batch(events, test_context, appid, channel=None): try: with open(file_path, 'w') as f: f.write(contents) - srun('pio import --appid {} --input {} {}'.format( + srun('pio import --appid {} --input {} {} -- {}'.format( appid, file_path, - '--channel {}'.format(channel) if channel else '')) + '--channel {}'.format(channel) if channel else '', + '--conf spark.sql.warehouse.dir=file:///tmp/spark-warehouse')) finally: os.remove(file_path) http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/script.travis.sh ---------------------------------------------------------------------- diff --git a/tests/script.travis.sh b/tests/script.travis.sh index db69413..efdbb9e 100755 --- a/tests/script.travis.sh +++ b/tests/script.travis.sh @@ -17,9 +17,9 @@ # if [[ $BUILD_TYPE == Unit ]]; then - ./tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \ - /PredictionIO/tests/unit.sh + tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \ + "/PredictionIO/tests/unit.sh $BUILD_PROFILE" else - ./tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \ - python3 /PredictionIO/tests/pio_tests/tests.py + tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \ + "python3 /PredictionIO/tests/pio_tests/tests.py" fi http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tests/unit.sh ---------------------------------------------------------------------- diff --git a/tests/unit.sh b/tests/unit.sh index 324b87f..724b878 100755 --- a/tests/unit.sh +++ b/tests/unit.sh @@ -14,20 +14,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -# Run license check pushd /PredictionIO +# Run license check ./tests/check_license.sh # Prepare pio environment variables set -a -source conf/pio-env.sh +source ./conf/pio-env.sh set +a # Run stylecheck -sbt/sbt scalastyle +sbt/sbt -Dbuild.profile=$1 scalastyle + # Run all unit tests -sbt/sbt test +sbt/sbt -Dbuild.profile=$1 test popd http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tools/build.sbt ---------------------------------------------------------------------- diff --git a/tools/build.sbt b/tools/build.sbt index 57e7d96..d1ce2b9 100644 --- a/tools/build.sbt +++ b/tools/build.sbt @@ -20,22 +20,22 @@ import sbtassembly.AssemblyPlugin.autoImport._ name := "apache-predictionio-tools" libraryDependencies ++= Seq( - "com.github.scopt" %% "scopt" % "3.2.0", + "com.github.scopt" %% "scopt" % "3.5.0", "io.spray" %% "spray-can" % "1.3.3", "io.spray" %% "spray-routing" % "1.3.3", "me.lessis" % "semverfi_2.10" % "0.1.3", - "org.apache.hadoop" % "hadoop-common" % "2.6.2", - "org.apache.hadoop" % "hadoop-hdfs" % "2.6.2", + "org.apache.hadoop" % "hadoop-common" % hadoopVersion.value, + "org.apache.hadoop" % "hadoop-hdfs" % hadoopVersion.value, "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", "org.clapper" %% "grizzled-slf4j" % "1.0.2", "org.json4s" %% "json4s-native" % json4sVersion.value, "org.json4s" %% "json4s-ext" % json4sVersion.value, "org.scalaj" %% "scalaj-http" % "1.1.6", - "org.spark-project.akka" %% "akka-actor" % "2.3.4-spark", + "com.typesafe.akka" %% "akka-actor" % akkaVersion.value, + "com.typesafe.akka" %% "akka-slf4j" % akkaVersion.value, "io.spray" %% "spray-testkit" % "1.3.3" % "test", - "org.specs2" %% "specs2" % "2.3.13" % "test", - "org.spark-project.akka" %% "akka-slf4j" % "2.3.4-spark") + "org.specs2" %% "specs2" % "2.3.13" % "test") dependencyOverrides += "org.slf4j" % "slf4j-log4j12" % "1.7.18" http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala ---------------------------------------------------------------------- diff --git a/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala b/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala index bbe39a5..7e8fd30 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala @@ -34,6 +34,7 @@ import spray.httpx.Json4sSupport import spray.routing._ import scala.concurrent.ExecutionContext +import scala.concurrent.duration.Duration class AdminServiceActor(val commandClient: CommandClient) extends HttpServiceActor { @@ -151,7 +152,7 @@ object AdminServer { } object AdminRun { - def main (args: Array[String]) { + def main (args: Array[String]) : Unit = { AdminServer.createAdminServer(AdminServerConfig( ip = "localhost", port = 7071)) http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/a8290dce/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala ---------------------------------------------------------------------- diff --git a/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala b/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala index de09cab..c101d3f 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala @@ -21,13 +21,13 @@ package org.apache.predictionio.tools.export import org.apache.predictionio.controller.Utils import org.apache.predictionio.data.storage.EventJson4sSupport import org.apache.predictionio.data.storage.Storage +import org.apache.predictionio.data.SparkVersionDependent import org.apache.predictionio.tools.Runner import org.apache.predictionio.workflow.WorkflowContext import org.apache.predictionio.workflow.WorkflowUtils import grizzled.slf4j.Logging import org.apache.spark.sql.SaveMode -import org.apache.spark.sql.SQLContext import org.json4s.native.Serialization._ case class EventsToFileArgs( @@ -91,14 +91,14 @@ object EventsToFile extends Logging { mode = "Export", batch = "App ID " + args.appId + channelStr, executorEnv = Runner.envStringToMap(args.env)) - val sqlContext = new SQLContext(sc) + val sqlSession = SparkVersionDependent.sqlSession(sc) val events = Storage.getPEvents() val eventsRdd = events.find(appId = args.appId, channelId = channelId)(sc) val jsonStringRdd = eventsRdd.map(write(_)) if (args.format == "json") { jsonStringRdd.saveAsTextFile(args.outputPath) } else { - val jsonDf = sqlContext.read.json(jsonStringRdd) + val jsonDf = sqlSession.read.json(jsonStringRdd) jsonDf.write.mode(SaveMode.ErrorIfExists).parquet(args.outputPath) } info(s"Events are exported to ${args.outputPath}/.")
