Repository: incubator-predictionio Updated Branches: refs/heads/develop 8fd59fdf1 -> 31c4bd192
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala ---------------------------------------------------------------------- diff --git a/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala new file mode 100644 index 0000000..b9ec957 --- /dev/null +++ b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.predictionio.data.storage.localfs + +import java.io.File + +import grizzled.slf4j.Logging +import org.apache.predictionio.data.storage.BaseStorageClient +import org.apache.predictionio.data.storage.StorageClientConfig +import org.apache.predictionio.data.storage.StorageClientException + +class StorageClient(val config: StorageClientConfig) extends BaseStorageClient + with Logging { + override val prefix = "LocalFS" + val f = new File( + config.properties.getOrElse("PATH", config.properties("HOSTS"))) + if (f.exists) { + if (!f.isDirectory) throw new StorageClientException( + s"${f} already exists but it is not a directory!", + null) + if (!f.canWrite) throw new StorageClientException( + s"${f} already exists but it is not writable!", + null) + } else { + if (!f.mkdirs) throw new StorageClientException( + s"${f} does not exist and automatic creation failed!", + null) + } + val client = f +} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala ---------------------------------------------------------------------- diff --git a/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala new file mode 100644 index 0000000..554ab26 --- /dev/null +++ b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.predictionio.data.storage + +/** Local file system implementation of storage traits, supporting model data only + * + * @group Implementation + */ +package object localfs {} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/test/resources/application.conf ---------------------------------------------------------------------- diff --git a/storage/localfs/src/test/resources/application.conf b/storage/localfs/src/test/resources/application.conf new file mode 100644 index 0000000..eecae44 --- /dev/null +++ b/storage/localfs/src/test/resources/application.conf @@ -0,0 +1,28 @@ +org.apache.predictionio.data.storage { + sources { + mongodb { + type = mongodb + hosts = [localhost] + ports = [27017] + } + elasticsearch { + type = elasticsearch + hosts = [localhost] + ports = [9300] + } + } + repositories { + # This section is dummy just to make storage happy. + # The actual testing will not bypass these repository settings completely. + # Please refer to StorageTestUtils.scala. + settings { + name = "test_predictionio" + source = mongodb + } + + appdata { + name = "test_predictionio_appdata" + source = mongodb + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/Dockerfile ---------------------------------------------------------------------- diff --git a/tests/Dockerfile b/tests/Dockerfile index d1d048a..94f5688 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -17,11 +17,13 @@ FROM predictionio/pio -ENV SPARK_VERSION 1.4.0 -ENV ELASTICSEARCH_VERSION 1.4.4 +ENV SPARK_VERSION 1.6.3 +ENV ELASTICSEARCH_VERSION 5.2.1 ENV HBASE_VERSION 1.0.0 ADD docker-files/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz /vendors +# WORKAROUND: es-hadoop stops on RDD#take(1) +RUN echo "spark.locality.wait.node 0s" > /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6/conf/spark-defaults.conf ENV SPARK_HOME /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6 COPY docker-files/postgresql-9.4-1204.jdbc41.jar /drivers/postgresql-9.4-1204.jdbc41.jar @@ -31,6 +33,10 @@ COPY docker-files/env-conf/pio-env.sh ${PIO_HOME}/conf/pio-env.sh COPY docker-files/pgpass /root/.pgpass RUN chmod 600 /root/.pgpass +# Python +RUN pip install python-dateutil +RUN pip install pytz + # Default repositories setup ENV PIO_STORAGE_REPOSITORIES_METADATA_SOURCE PGSQL ENV PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE PGSQL http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/build-docker.sh ---------------------------------------------------------------------- diff --git a/tests/build-docker.sh b/tests/build-docker.sh index dd95168..459b929 100755 --- a/tests/build-docker.sh +++ b/tests/build-docker.sh @@ -17,9 +17,9 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -if [ ! -f $DIR/docker-files/spark-1.4.0-bin-hadoop2.6.tgz ]; then - wget http://d3kbcqa49mib13.cloudfront.net/spark-1.4.0-bin-hadoop2.6.tgz - mv spark-1.4.0-bin-hadoop2.6.tgz $DIR/docker-files/ +if [ ! -f $DIR/docker-files/spark-1.6.3-bin-hadoop2.6.tgz ]; then + wget http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz + mv spark-1.6.3-bin-hadoop2.6.tgz $DIR/docker-files/ fi if [ ! -f $DIR/docker-files/postgresql-9.4-1204.jdbc41.jar ]; then @@ -33,6 +33,8 @@ pushd $DIR/.. sbt/sbt clean mkdir assembly cp dist/lib/*.jar assembly/ +mkdir -p lib/spark +cp dist/lib/spark/*.jar lib/spark docker build -t predictionio/pio . popd docker build -t predictionio/pio-testing $DIR http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/docker-compose.yml ---------------------------------------------------------------------- diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml index 3939a0b..b556f7b 100644 --- a/tests/docker-compose.yml +++ b/tests/docker-compose.yml @@ -16,7 +16,7 @@ version: "2" services: elasticsearch: - image: elasticsearch:1-alpine + image: elasticsearch:5-alpine hbase: image: harisekhon/hbase:1.0 postgres: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/docker-files/env-conf/pio-env.sh ---------------------------------------------------------------------- diff --git a/tests/docker-files/env-conf/pio-env.sh b/tests/docker-files/env-conf/pio-env.sh index e1076ba..0acf3a7 100644 --- a/tests/docker-files/env-conf/pio-env.sh +++ b/tests/docker-files/env-conf/pio-env.sh @@ -87,7 +87,8 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch #PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME=pio PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=elasticsearch -PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300 +PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http +PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200 #PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$ELASTICSEARCH_HOME # Local File System Example http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/pio_tests/scenarios/eventserver_test.py ---------------------------------------------------------------------- diff --git a/tests/pio_tests/scenarios/eventserver_test.py b/tests/pio_tests/scenarios/eventserver_test.py index c09e815..6f29876 100644 --- a/tests/pio_tests/scenarios/eventserver_test.py +++ b/tests/pio_tests/scenarios/eventserver_test.py @@ -19,6 +19,8 @@ import unittest import requests import json import argparse +import dateutil.parser +import pytz from subprocess import Popen from utils import AppEngine, pjoin from pio_tests.integration import BaseTestCase, AppContext @@ -155,7 +157,8 @@ class EventserverTest(BaseTestCase): 'reversed': 'true' } r = self.app.get_events(params=params) self.assertEqual(5, len(r.json())) - self.assertEqual('2014-11-05T09:39:45.618-08:00', r.json()[0]['eventTime']) + event_time = dateutil.parser.parse(r.json()[0]['eventTime']).astimezone(pytz.utc) + self.assertEqual('2014-11-05 17:39:45.618000+00:00', str(event_time)) def tearDown(self): self.log.info("Deleting all app data") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/run_docker.sh ---------------------------------------------------------------------- diff --git a/tests/run_docker.sh b/tests/run_docker.sh index 9f28d1c..fe07957 100755 --- a/tests/run_docker.sh +++ b/tests/run_docker.sh @@ -19,7 +19,7 @@ USAGE=$"Usage: run_docker <meta> <event> <model> <command> Where: meta = [PGSQL,ELASTICSEARCH] - event = [PGSQL,HBASE] + event = [PGSQL,HBASE,ELASTICSEARCH] model = [PGSQL,LOCALFS,HDFS] command = command to run in the container" @@ -30,7 +30,7 @@ fi META="$1" shift -if ! [[ "$1" =~ ^(PGSQL|HBASE)$ ]]; then +if ! [[ "$1" =~ ^(PGSQL|HBASE|ELASTICSEARCH)$ ]]; then echo "$USAGE" exit 1 fi http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tools/src/main/scala/org/apache/predictionio/tools/Common.scala ---------------------------------------------------------------------- diff --git a/tools/src/main/scala/org/apache/predictionio/tools/Common.scala b/tools/src/main/scala/org/apache/predictionio/tools/Common.scala index 6c56615..7d04c07 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/Common.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/Common.scala @@ -102,6 +102,13 @@ object Common extends EitherLogging { if (targetFiles.size > 0) targetFiles else libFiles } + def jarFilesForSpark(pioHome: String): Array[File] = { + def jarFilesAt(path: File): Array[File] = path.listFiles filter { + _.getName.toLowerCase.endsWith(".jar") + } + jarFilesAt(new File(pioHome, "lib/spark")) + } + def coreAssembly(pioHome: String): Expected[File] = { val core = s"pio-assembly-${BuildInfo.version}.jar" val coreDir = http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala ---------------------------------------------------------------------- diff --git a/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala b/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala index 4f5a176..662dbbf 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala @@ -160,7 +160,8 @@ object Runner extends EitherLogging { val sparkSubmitCommand = Seq(Seq(sparkHome, "bin", "spark-submit").mkString(File.separator)) - val sparkSubmitJarsList = WorkflowUtils.thirdPartyJars ++ deployedJars + val sparkSubmitJarsList = WorkflowUtils.thirdPartyJars ++ deployedJars ++ + Common.jarFilesForSpark(pioHome).map(_.toURI) val sparkSubmitJars = if (sparkSubmitJarsList.nonEmpty) { Seq("--jars", sparkSubmitJarsList.map(_.toString).mkString(",")) } else { http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala ---------------------------------------------------------------------- diff --git a/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala b/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala index 4656457..69a3924 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala @@ -297,7 +297,9 @@ object Engine extends EitherLogging { val extraFiles = WorkflowUtils.thirdPartyConfFiles val jarFiles = jarFilesForScala(engineDirPath) jarFiles foreach { f => info(s"Found JAR: ${f.getName}") } - val allJarFiles = jarFiles.map(_.getCanonicalPath) + val jarPluginFiles = jarFilesForSpark(pioHome) + jarPluginFiles foreach { f => info(s"Found JAR: ${f.getName}") } + val allJarFiles = jarFiles.map(_.getCanonicalPath) ++ jarPluginFiles.map(_.getCanonicalPath) val cmd = s"${getSparkHome(sparkArgs.sparkHome)}/bin/spark-submit --jars " + s"${allJarFiles.mkString(",")} " +
