[PIO-97] Fixes examples of the official templates for v0.11.0-incubating. Closes #400
Project: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/commit/76f34090 Tree: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/tree/76f34090 Diff: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/diff/76f34090 Branch: refs/heads/develop Commit: 76f340900f4d72c2174ee9565593613e0ad06b3b Parents: 5c77915 Author: shimamoto <[email protected]> Authored: Mon Jul 10 13:00:32 2017 +0900 Committer: shimamoto <[email protected]> Committed: Mon Jul 10 13:00:32 2017 +0900 ---------------------------------------------------------------------- examples/.gitignore | 1 - .../scala-parallel-classification/README.md | 20 + .../add-algorithm/.gitignore | 2 +- .../add-algorithm/build.sbt | 11 +- .../add-algorithm/data/data.txt | 153 +++++ .../add-algorithm/data/import_eventserver.py | 8 +- .../add-algorithm/engine.json | 6 +- .../add-algorithm/project/assembly.sbt | 2 +- .../add-algorithm/project/build.properties | 1 + .../add-algorithm/project/pio-build.sbt | 1 - .../src/main/scala/CompleteEvaluation.scala | 30 + .../src/main/scala/DataSource.scala | 76 ++- .../add-algorithm/src/main/scala/Engine.scala | 14 +- .../src/main/scala/Evaluation.scala | 53 ++ .../src/main/scala/NaiveBayesAlgorithm.scala | 22 +- .../src/main/scala/PrecisionEvaluation.scala | 44 ++ .../src/main/scala/Preparator.scala | 3 +- .../src/main/scala/RandomForestAlgorithm.scala | 8 +- .../add-algorithm/src/main/scala/Serving.scala | 2 +- .../add-algorithm/template.json | 1 + .../custom-attributes/.gitignore | 4 - .../custom-attributes/README.md | 196 ------ .../custom-attributes/build.sbt | 31 - .../data/import_eventserver.py | 61 -- .../data/sample_random_forest_data.txt | 8 - .../custom-attributes/engine.json | 23 - .../custom-attributes/project/assembly.sbt | 1 - .../src/main/scala/DataSource.scala | 83 --- .../src/main/scala/Engine.scala | 41 -- .../src/main/scala/Preparator.scala | 41 -- .../src/main/scala/RandomForestAlgorithm.scala | 84 --- .../src/main/scala/Serving.scala | 29 - .../reading-custom-properties/.gitignore | 5 + .../reading-custom-properties/build.sbt | 24 + .../reading-custom-properties/data/data.txt | 153 +++++ .../data/import_eventserver.py | 64 ++ .../reading-custom-properties/engine.json | 18 + .../project/assembly.sbt | 1 + .../project/build.properties | 1 + .../src/main/scala/CompleteEvaluation.scala | 30 + .../src/main/scala/DataSource.scala | 138 ++++ .../src/main/scala/Engine.scala | 47 ++ .../src/main/scala/Evaluation.scala | 53 ++ .../src/main/scala/NaiveBayesAlgorithm.scala | 58 ++ .../src/main/scala/PrecisionEvaluation.scala | 44 ++ .../src/main/scala/Preparator.scala | 35 + .../src/main/scala/Serving.scala | 29 + .../reading-custom-properties/template.json | 1 + .../README.md | 20 + .../adjust-score/.gitignore | 4 + .../adjust-score/build.sbt | 24 + .../adjust-score/data/import_eventserver.py | 101 +++ .../adjust-score/data/send_query.py | 24 + .../adjust-score/engine.json | 25 + .../adjust-score/project/assembly.sbt | 1 + .../adjust-score/project/build.properties | 1 + .../src/main/scala/DataSource.scala | 150 +++++ .../src/main/scala/ECommAlgorithm.scala | 647 +++++++++++++++++++ .../adjust-score/src/main/scala/Engine.scala | 48 ++ .../src/main/scala/Preparator.scala | 43 ++ .../adjust-score/src/main/scala/Serving.scala | 30 + .../adjust-score/template.json | 1 + .../train-with-rate-event/README.md | 40 -- .../train-with-rate-event/build.sbt | 11 +- .../data/import_eventserver.py | 18 +- .../train-with-rate-event/data/send_query.py | 2 +- .../train-with-rate-event/engine.json | 9 +- .../train-with-rate-event/pio.sbt | 4 - .../train-with-rate-event/project/assembly.sbt | 2 +- .../project/build.properties | 1 + .../train-with-rate-event/project/pio-build.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 453 ------------- .../src/main/scala/DataSource.scala | 77 ++- .../src/main/scala/ECommAlgorithm.scala | 597 +++++++++++++++++ .../src/main/scala/Engine.scala | 14 +- .../src/main/scala/Preparator.scala | 8 +- .../src/main/scala/Serving.scala | 2 +- .../train-with-rate-event/template.json | 2 +- .../weighted-items/.gitignore | 4 - .../weighted-items/README.md | 229 ------- .../weighted-items/build.sbt | 29 - .../weighted-items/data/import_eventserver.py | 101 --- .../weighted-items/data/send_query.py | 24 - .../weighted-items/data/set_weights.py | 56 -- .../weighted-items/engine.json | 24 - .../weighted-items/project/assembly.sbt | 1 - .../weighted-items/project/pio-build.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 482 -------------- .../src/main/scala/DataSource.scala | 131 ---- .../weighted-items/src/main/scala/Engine.scala | 48 -- .../src/main/scala/Preparator.scala | 41 -- .../weighted-items/src/main/scala/Serving.scala | 30 - .../weighted-items/template.json | 1 - .../scala-parallel-recommendation/README.md | 20 + .../blacklist-items/build.sbt | 24 + .../blacklist-items/data/import_eventserver.py | 73 +++ .../blacklist-items/data/send_query.py | 24 + .../blacklist-items/engine.json | 21 + .../blacklist-items/project/assembly.sbt | 1 + .../blacklist-items/project/build.properties | 1 + .../src/main/scala/ALSAlgorithm.scala | 157 +++++ .../src/main/scala/ALSModel.scala | 100 +++ .../src/main/scala/DataSource.scala | 120 ++++ .../blacklist-items/src/main/scala/Engine.scala | 50 ++ .../src/main/scala/Evaluation.scala | 105 +++ .../src/main/scala/Preparator.scala | 36 ++ .../src/main/scala/Serving.scala | 30 + .../blacklist-items/template.json | 1 + .../custom-prepartor/.gitignore | 5 - .../custom-prepartor/build.sbt | 29 - .../custom-prepartor/data/import_eventserver.py | 73 --- .../data/sample_not_train_data.txt | 8 - .../custom-prepartor/data/send_query.py | 24 - .../custom-prepartor/engine.json | 26 - .../custom-prepartor/project/assembly.sbt | 1 - .../custom-prepartor/project/pio-build.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 104 --- .../src/main/scala/ALSModel.scala | 80 --- .../src/main/scala/DataSource.scala | 86 --- .../src/main/scala/Engine.scala | 45 -- .../src/main/scala/Preparator.scala | 48 -- .../src/main/scala/Serving.scala | 29 - .../custom-query/.gitignore | 4 - .../custom-query/README.md | 161 ----- .../custom-query/build.sbt | 31 - .../custom-query/data/build.sbt | 8 - .../recommendation/ImportDataScript.scala | 108 ---- .../custom-query/engine.json | 21 - .../custom-query/project/assembly.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 167 ----- .../custom-query/src/main/scala/ALSModel.scala | 67 -- .../src/main/scala/DataSource.scala | 99 --- .../custom-query/src/main/scala/Engine.scala | 36 -- .../src/main/scala/Preparator.scala | 33 - .../custom-query/src/main/scala/Serving.scala | 32 - .../custom-serving/.gitignore | 5 - .../custom-serving/build.sbt | 29 - .../data/sample_disabled_items.txt | 0 .../custom-serving/engine.json | 26 - .../custom-serving/project/assembly.sbt | 1 - .../custom-serving/project/pio-build.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 104 --- .../src/main/scala/ALSModel.scala | 80 --- .../src/main/scala/DataSource.scala | 86 --- .../custom-serving/src/main/scala/Engine.scala | 45 -- .../src/main/scala/Preparator.scala | 36 -- .../custom-serving/src/main/scala/Serving.scala | 43 -- .../customize-data-prep/.gitignore | 5 + .../customize-data-prep/build.sbt | 24 + .../data/import_eventserver.py | 73 +++ .../data/sample_not_train_data.txt | 8 + .../customize-data-prep/data/send_query.py | 24 + .../customize-data-prep/engine.json | 26 + .../customize-data-prep/project/assembly.sbt | 1 + .../project/build.properties | 1 + .../src/main/scala/ALSAlgorithm.scala | 155 +++++ .../src/main/scala/ALSModel.scala | 80 +++ .../src/main/scala/DataSource.scala | 120 ++++ .../src/main/scala/Engine.scala | 49 ++ .../src/main/scala/Evaluation.scala | 105 +++ .../src/main/scala/Preparator.scala | 48 ++ .../src/main/scala/Serving.scala | 30 + .../customize-data-prep/template.json | 1 + .../customize-serving/.gitignore | 5 + .../customize-serving/build.sbt | 24 + .../data/import_eventserver.py | 73 +++ .../data/sample_disabled_items.txt | 0 .../customize-serving/data/send_query.py | 24 + .../customize-serving/engine.json | 26 + .../customize-serving/project/assembly.sbt | 1 + .../customize-serving/project/build.properties | 1 + .../src/main/scala/ALSAlgorithm.scala | 155 +++++ .../src/main/scala/ALSModel.scala | 80 +++ .../src/main/scala/DataSource.scala | 120 ++++ .../src/main/scala/Engine.scala | 49 ++ .../src/main/scala/Evaluation.scala | 105 +++ .../src/main/scala/Preparator.scala | 36 ++ .../src/main/scala/Serving.scala | 43 ++ .../customize-serving/template.json | 1 + .../filter-by-category/README.md | 202 ------ .../filter-by-category/build.sbt | 29 - .../data/import_eventserver.py | 98 --- .../filter-by-category/data/send_query.py | 24 - .../filter-by-category/engine.json | 21 - .../filter-by-category/project/assembly.sbt | 1 - .../filter-by-category/project/pio-build.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 118 ---- .../src/main/scala/ALSModel.scala | 98 --- .../src/main/scala/DataSource.scala | 110 ---- .../src/main/scala/Engine.scala | 46 -- .../src/main/scala/Preparator.scala | 37 -- .../src/main/scala/Serving.scala | 30 - .../reading-custom-events/.gitignore | 5 + .../reading-custom-events/build.sbt | 24 + .../data/import_eventserver.py | 72 +++ .../reading-custom-events/data/send_query.py | 24 + .../reading-custom-events/engine.json | 21 + .../reading-custom-events/project/assembly.sbt | 1 + .../project/build.properties | 1 + .../src/main/scala/ALSAlgorithm.scala | 155 +++++ .../src/main/scala/ALSModel.scala | 80 +++ .../src/main/scala/DataSource.scala | 121 ++++ .../src/main/scala/Engine.scala | 49 ++ .../src/main/scala/Evaluation.scala | 105 +++ .../src/main/scala/Preparator.scala | 36 ++ .../src/main/scala/Serving.scala | 30 + .../reading-custom-events/template.json | 1 + .../train-with-view-event/.gitignore | 5 + .../train-with-view-event/build.sbt | 24 + .../data/import_eventserver.py | 81 +++ .../train-with-view-event/data/send_query.py | 24 + .../train-with-view-event/engine.json | 21 + .../train-with-view-event/project/assembly.sbt | 1 + .../project/build.properties | 1 + .../src/main/scala/ALSAlgorithm.scala | 156 +++++ .../src/main/scala/ALSModel.scala | 80 +++ .../src/main/scala/DataSource.scala | 124 ++++ .../src/main/scala/Engine.scala | 49 ++ .../src/main/scala/Evaluation.scala | 105 +++ .../src/main/scala/Preparator.scala | 36 ++ .../src/main/scala/Serving.scala | 30 + .../train-with-view-event/template.json | 1 + .../scala-parallel-similarproduct/README.md | 20 + .../add-and-return-item-properties/.gitignore | 4 - .../add-and-return-item-properties/README.md | 205 ------ .../add-and-return-item-properties/build.sbt | 29 - .../data/import_eventserver.py | 93 --- .../data/send_query.py | 24 - .../add-and-return-item-properties/engine.json | 21 - .../project/assembly.sbt | 1 - .../project/pio-build.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 259 -------- .../src/main/scala/DataSource.scala | 139 ---- .../src/main/scala/Engine.scala | 51 -- .../src/main/scala/Preparator.scala | 41 -- .../src/main/scala/Serving.scala | 30 - .../add-rateevent/README.md | 156 ----- .../add-rateevent/build.sbt | 29 - .../add-rateevent/data/import_eventserver.py | 90 --- .../add-rateevent/data/send_query.py | 24 - .../add-rateevent/engine.json | 21 - .../add-rateevent/project/assembly.sbt | 1 - .../add-rateevent/project/pio-build.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 262 -------- .../src/main/scala/DataSource.scala | 132 ---- .../add-rateevent/src/main/scala/Engine.scala | 48 -- .../src/main/scala/Preparator.scala | 41 -- .../add-rateevent/src/main/scala/Serving.scala | 30 - .../add-rateevent/template.json | 1 - .../filterbyyear/.gitignore | 3 - .../filterbyyear/README.md | 150 ----- .../filterbyyear/build.sbt | 29 - .../filterbyyear/data/import_eventserver.py | 90 --- .../filterbyyear/data/send_query.py | 24 - .../filterbyyear/engine.json | 26 - .../filterbyyear/project/assembly.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 258 -------- .../src/main/scala/DataSource.scala | 139 ---- .../filterbyyear/src/main/scala/Engine.scala | 50 -- .../src/main/scala/Preparator.scala | 41 -- .../filterbyyear/src/main/scala/Serving.scala | 29 - .../multi-events-multi-algos/.gitignore | 4 + .../multi-events-multi-algos/build.sbt | 24 + .../data/import_eventserver.py | 113 ++++ .../multi-events-multi-algos/data/send_query.py | 24 + .../engine-cooccurrence.json | 18 + .../multi-events-multi-algos/engine.json | 30 + .../project/assembly.sbt | 1 + .../project/build.properties | 1 + .../src/main/scala/ALSAlgorithm.scala | 263 ++++++++ .../src/main/scala/CooccurrenceAlgorithm.scala | 175 +++++ .../src/main/scala/DataSource.scala | 170 +++++ .../src/main/scala/Engine.scala | 54 ++ .../src/main/scala/LikeAlgorithm.scala | 117 ++++ .../src/main/scala/Preparator.scala | 43 ++ .../src/main/scala/Serving.scala | 70 ++ .../multi-events-multi-algos/template.json | 1 + .../multi/.gitignore | 5 - .../multi/README.md | 20 - .../multi/build.sbt | 29 - .../multi/data/import_eventserver.py | 113 ---- .../multi/data/send_query.py | 24 - .../multi/engine.json | 30 - .../multi/project/assembly.sbt | 1 - .../multi/project/pio-build.sbt | 1 - .../multi/src/main/scala/ALSAlgorithm.scala | 282 -------- .../multi/src/main/scala/DataSource.scala | 171 ----- .../multi/src/main/scala/Engine.scala | 51 -- .../multi/src/main/scala/LikeAlgorithm.scala | 119 ---- .../multi/src/main/scala/Preparator.scala | 43 -- .../multi/src/main/scala/Serving.scala | 70 -- .../no-set-user/.gitignore | 4 - .../no-set-user/README.md | 141 ---- .../no-set-user/build.sbt | 29 - .../no-set-user/data/import_eventserver.py | 82 --- .../no-set-user/data/send_query.py | 24 - .../no-set-user/engine.json | 21 - .../no-set-user/project/assembly.sbt | 1 - .../no-set-user/project/pio-build.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 251 ------- .../no-set-user/src/main/scala/DataSource.scala | 109 ---- .../no-set-user/src/main/scala/Engine.scala | 48 -- .../no-set-user/src/main/scala/Preparator.scala | 39 -- .../no-set-user/src/main/scala/Serving.scala | 29 - .../no-set-user/template.json | 1 - .../recommended-user/.gitignore | 5 +- .../recommended-user/README.md | 232 ------- .../recommended-user/build.sbt | 9 +- .../recommended-user/data/import_eventserver.py | 12 +- .../recommended-user/data/send_query.py | 2 +- .../recommended-user/engine.json | 4 +- .../recommended-user/project/assembly.sbt | 2 +- .../recommended-user/project/build.properties | 1 + .../recommended-user/project/pio-build.sbt | 1 - .../src/main/scala/ALSAlgorithm.scala | 4 +- .../src/main/scala/DataSource.scala | 18 +- .../src/main/scala/Engine.scala | 14 +- .../src/main/scala/Preparator.scala | 2 +- .../src/main/scala/Serving.scala | 2 +- .../recommended-user/template.json | 2 +- .../return-item-properties/.gitignore | 4 + .../return-item-properties/build.sbt | 24 + .../data/import_eventserver.py | 93 +++ .../return-item-properties/data/send_query.py | 24 + .../engine-cooccurrence.json | 18 + .../return-item-properties/engine.json | 21 + .../return-item-properties/project/assembly.sbt | 1 + .../project/build.properties | 1 + .../src/main/scala/ALSAlgorithm.scala | 268 ++++++++ .../src/main/scala/CooccurrenceAlgorithm.scala | 180 ++++++ .../src/main/scala/DataSource.scala | 140 ++++ .../src/main/scala/Engine.scala | 57 ++ .../src/main/scala/Preparator.scala | 41 ++ .../src/main/scala/Serving.scala | 30 + .../return-item-properties/template.json | 1 + .../rid-user-set-event/.gitignore | 4 + .../rid-user-set-event/build.sbt | 24 + .../data/import_eventserver.py | 82 +++ .../rid-user-set-event/data/send_query.py | 24 + .../rid-user-set-event/engine-cooccurrence.json | 18 + .../rid-user-set-event/engine.json | 21 + .../rid-user-set-event/project/assembly.sbt | 1 + .../rid-user-set-event/project/build.properties | 1 + .../src/main/scala/ALSAlgorithm.scala | 259 ++++++++ .../src/main/scala/CooccurrenceAlgorithm.scala | 175 +++++ .../src/main/scala/DataSource.scala | 108 ++++ .../src/main/scala/Engine.scala | 53 ++ .../src/main/scala/Preparator.scala | 39 ++ .../src/main/scala/Serving.scala | 30 + .../rid-user-set-event/template.json | 1 + .../train-with-rate-event/build.sbt | 24 + .../data/import_eventserver.py | 105 +++ .../train-with-rate-event/data/send_query.py | 24 + .../engine-cooccurrence.json | 18 + .../train-with-rate-event/engine.json | 21 + .../train-with-rate-event/project/assembly.sbt | 1 + .../project/build.properties | 1 + .../src/main/scala/ALSAlgorithm.scala | 271 ++++++++ .../src/main/scala/CooccurrenceAlgorithm.scala | 176 +++++ .../src/main/scala/DataSource.scala | 133 ++++ .../src/main/scala/Engine.scala | 53 ++ .../src/main/scala/Preparator.scala | 41 ++ .../src/main/scala/Serving.scala | 30 + .../train-with-rate-event/template.json | 1 + 364 files changed, 10441 insertions(+), 9592 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/.gitignore ---------------------------------------------------------------------- diff --git a/examples/.gitignore b/examples/.gitignore deleted file mode 100644 index 2ddf5f2..0000000 --- a/examples/.gitignore +++ /dev/null @@ -1 +0,0 @@ -manifest.json http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/README.md ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/README.md b/examples/scala-parallel-classification/README.md new file mode 100644 index 0000000..2ec7d22 --- /dev/null +++ b/examples/scala-parallel-classification/README.md @@ -0,0 +1,20 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> + +This is based on Classification Engine Template v0.11.0-incubating. + +Please refer to http://predictionio.incubator.apache.org/templates/classification/how-to/ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/.gitignore ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/.gitignore b/examples/scala-parallel-classification/add-algorithm/.gitignore index 7c6771b..9917afe 100644 --- a/examples/scala-parallel-classification/add-algorithm/.gitignore +++ b/examples/scala-parallel-classification/add-algorithm/.gitignore @@ -1,5 +1,5 @@ -data/*.txt manifest.json pio.log /pio.sbt target/ +.idea http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/build.sbt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/build.sbt b/examples/scala-parallel-classification/add-algorithm/build.sbt index 98bf136..f5fb250 100644 --- a/examples/scala-parallel-classification/add-algorithm/build.sbt +++ b/examples/scala-parallel-classification/add-algorithm/build.sbt @@ -15,15 +15,10 @@ * limitations under the License. */ -import AssemblyKeys._ - -assemblySettings - name := "template-scala-parallel-classification" organization := "org.apache.predictionio" - +scalaVersion := "2.11.8" libraryDependencies ++= Seq( - "org.apache.predictionio" %% "core" % pioVersion.value % "provided", - "org.apache.spark" %% "spark-core" % "1.2.0" % "provided", - "org.apache.spark" %% "spark-mllib" % "1.2.0" % "provided") + "org.apache.predictionio" %% "apache-predictionio-core" % "0.11.0-incubating" % "provided", + "org.apache.spark" %% "spark-mllib" % "2.1.1" % "provided") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/data/data.txt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/data/data.txt b/examples/scala-parallel-classification/add-algorithm/data/data.txt new file mode 100644 index 0000000..c08ad7c --- /dev/null +++ b/examples/scala-parallel-classification/add-algorithm/data/data.txt @@ -0,0 +1,153 @@ +0,51 35 12 +0,49 30 12 +0,47 32 12 +0,46 31 12 +0,50 36 12 +0,54 39 14 +0,46 34 13 +0,50 34 12 +0,44 29 12 +0,49 31 11 +0,54 37 12 +0,48 34 12 +0,48 30 11 +0,43 30 11 +0,58 40 12 +0,57 44 14 +0,54 39 14 +0,51 35 13 +0,57 38 13 +0,51 38 13 +0,54 34 12 +0,51 37 14 +0,46 36 12 +0,51 33 15 +0,48 34 12 +0,50 30 12 +0,50 34 14 +0,52 35 12 +0,52 34 12 +0,47 32 12 +0,48 31 12 +0,54 34 14 +0,52 41 11 +0,55 42 12 +0,49 31 11 +0,50 32 12 +0,55 35 12 +0,49 31 11 +0,44 30 12 +0,51 34 12 +0,50 35 13 +0,45 23 13 +0,44 32 12 +0,50 35 16 +0,51 38 14 +0,48 30 13 +0,51 38 12 +0,46 32 12 +0,53 37 12 +0,50 33 12 +1,70 32 14 +1,64 32 15 +1,69 31 15 +1,55 23 13 +1,65 28 15 +1,57 28 13 +1,63 33 16 +1,49 24 10 +1,66 29 13 +1,52 27 14 +1,50 20 10 +1,59 30 15 +1,60 22 10 +1,61 29 14 +1,56 29 13 +1,67 31 14 +1,56 30 15 +1,58 27 10 +1,62 22 15 +1,56 25 11 +1,59 32 18 +1,61 28 13 +1,63 25 15 +1,61 28 12 +1,64 29 13 +1,66 30 14 +1,68 28 14 +1,67 30 17 +1,60 29 15 +1,57 26 10 +1,55 24 11 +1,55 24 10 +1,58 27 12 +1,60 27 16 +1,54 30 15 +1,60 34 16 +1,67 31 15 +1,63 23 13 +1,56 30 13 +1,55 25 13 +1,55 26 12 +1,61 30 14 +1,58 26 12 +1,50 23 10 +1,56 27 13 +1,57 30 12 +1,57 29 13 +1,62 29 13 +1,51 25 11 +1,57 28 13 +2,63 33 25 +2,58 27 19 +2,71 30 21 +2,63 29 18 +2,65 30 22 +2,76 30 21 +2,49 25 17 +2,73 29 18 +2,67 25 18 +2,72 36 25 +2,65 32 20 +2,64 27 19 +2,68 30 21 +2,57 25 20 +2,58 28 24 +2,64 32 23 +2,65 30 18 +2,77 38 22 +2,77 26 23 +2,60 22 15 +2,69 32 23 +2,56 28 20 +2,77 28 20 +2,63 27 18 +2,67 33 21 +2,72 32 18 +2,62 28 18 +2,61 30 18 +2,64 28 21 +2,72 30 16 +2,74 28 19 +2,79 38 20 +2,64 28 22 +2,63 28 15 +2,61 26 14 +2,77 30 23 +2,63 34 24 +2,64 31 18 +2,60 30 18 +2,69 31 21 +2,67 31 24 +2,69 31 23 +2,58 27 19 +2,68 32 23 +2,67 33 25 +2,67 30 23 +2,63 25 19 +2,65 30 20 +2,62 34 23 +2,59 30 18 +3,80 10 70 +3,82 20 71 +3,90 15 73 http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/data/import_eventserver.py ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/data/import_eventserver.py b/examples/scala-parallel-classification/add-algorithm/data/import_eventserver.py index 1c044c7..f96b1ee 100644 --- a/examples/scala-parallel-classification/add-algorithm/data/import_eventserver.py +++ b/examples/scala-parallel-classification/add-algorithm/data/import_eventserver.py @@ -25,7 +25,7 @@ import argparse def import_events(client, file): f = open(file, 'r') count = 0 - print "Importing data..." + print("Importing data...") for line in f: data = line.rstrip('\r\n').split(",") plan = data[0] @@ -43,17 +43,17 @@ def import_events(client, file): ) count += 1 f.close() - print "%s events are imported." % count + print("%s events are imported." % count) if __name__ == '__main__': parser = argparse.ArgumentParser( description="Import sample data for classification engine") parser.add_argument('--access_key', default='invald_access_key') parser.add_argument('--url', default="http://localhost:7070") - parser.add_argument('--file', default="./data/sample_naive_bayes_data.txt") + parser.add_argument('--file', default="./data/data.txt") args = parser.parse_args() - print args + print(args) client = predictionio.EventClient( access_key=args.access_key, http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/engine.json ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/engine.json b/examples/scala-parallel-classification/add-algorithm/engine.json index 686d680..6aa4bb7 100644 --- a/examples/scala-parallel-classification/add-algorithm/engine.json +++ b/examples/scala-parallel-classification/add-algorithm/engine.json @@ -1,17 +1,17 @@ { "id": "default", "description": "Default settings", - "engineFactory": "org.template.classification.ClassificationEngine", + "engineFactory": "org.apache.predictionio.examples.classification.ClassificationEngine", "datasource": { "params": { - "appId": 2 + "appName": "MyApp1" } }, "algorithms": [ { "name": "randomforest", "params": { - "numClasses": 3, + "numClasses": 4, "numTrees": 5, "featureSubsetStrategy": "auto", "impurity": "gini", http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/project/assembly.sbt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/project/assembly.sbt b/examples/scala-parallel-classification/add-algorithm/project/assembly.sbt index 54c3252..e17409e 100644 --- a/examples/scala-parallel-classification/add-algorithm/project/assembly.sbt +++ b/examples/scala-parallel-classification/add-algorithm/project/assembly.sbt @@ -1 +1 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.4") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/project/build.properties ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/project/build.properties b/examples/scala-parallel-classification/add-algorithm/project/build.properties new file mode 100644 index 0000000..cf19fd0 --- /dev/null +++ b/examples/scala-parallel-classification/add-algorithm/project/build.properties @@ -0,0 +1 @@ +sbt.version=0.13.15 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/project/pio-build.sbt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/project/pio-build.sbt b/examples/scala-parallel-classification/add-algorithm/project/pio-build.sbt deleted file mode 100644 index 9aed0ee..0000000 --- a/examples/scala-parallel-classification/add-algorithm/project/pio-build.sbt +++ /dev/null @@ -1 +0,0 @@ -addSbtPlugin("org.apache.predictionio" % "pio-build" % "0.9.0") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/src/main/scala/CompleteEvaluation.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/CompleteEvaluation.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/CompleteEvaluation.scala new file mode 100644 index 0000000..ea997e2 --- /dev/null +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/CompleteEvaluation.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.predictionio.examples.classification + +import org.apache.predictionio.controller.Evaluation +import org.apache.predictionio.controller.MetricEvaluator + +object CompleteEvaluation extends Evaluation { + engineEvaluator = ( + ClassificationEngine(), + MetricEvaluator( + metric = Accuracy(), + otherMetrics = Seq(Precision(0.0), Precision(1.0), Precision(2.0)), + outputPath = "best.json")) +} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/src/main/scala/DataSource.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/DataSource.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/DataSource.scala index 7ac2367..d800282 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/DataSource.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/DataSource.scala @@ -15,36 +15,36 @@ * limitations under the License. */ -package org.template.classification +package org.apache.predictionio.examples.classification import org.apache.predictionio.controller.PDataSource import org.apache.predictionio.controller.EmptyEvaluationInfo -import org.apache.predictionio.controller.EmptyActualResult import org.apache.predictionio.controller.Params -import org.apache.predictionio.data.storage.Event -import org.apache.predictionio.data.storage.Storage +import org.apache.predictionio.data.store.PEventStore import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.linalg.Vectors import grizzled.slf4j.Logger -case class DataSourceParams(appId: Int) extends Params +case class DataSourceParams( + appName: String, + evalK: Option[Int] // define the k-fold parameter. +) extends Params class DataSource(val dsp: DataSourceParams) extends PDataSource[TrainingData, - EmptyEvaluationInfo, Query, EmptyActualResult] { + EmptyEvaluationInfo, Query, ActualResult] { @transient lazy val logger = Logger[this.type] override def readTraining(sc: SparkContext): TrainingData = { - val eventsDb = Storage.getPEvents() - val labeledPoints: RDD[LabeledPoint] = eventsDb.aggregateProperties( - appId = dsp.appId, + + val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties( + appName = dsp.appName, entityType = "user", // only keep entities with these required properties defined required = Some(List("plan", "attr0", "attr1", "attr2")))(sc) @@ -66,10 +66,64 @@ class DataSource(val dsp: DataSourceParams) throw e } } - } + }.cache() new TrainingData(labeledPoints) } + + override + def readEval(sc: SparkContext) + : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = { + require(dsp.evalK.nonEmpty, "DataSourceParams.evalK must not be None") + + // The following code reads the data from data store. It is equivalent to + // the readTraining method. We copy-and-paste the exact code here for + // illustration purpose, a recommended approach is to factor out this logic + // into a helper function and have both readTraining and readEval call the + // helper. + val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties( + appName = dsp.appName, + entityType = "user", + // only keep entities with these required properties defined + required = Some(List("plan", "attr0", "attr1", "attr2")))(sc) + // aggregateProperties() returns RDD pair of + // entity ID and its aggregated properties + .map { case (entityId, properties) => + try { + LabeledPoint(properties.get[Double]("plan"), + Vectors.dense(Array( + properties.get[Double]("attr0"), + properties.get[Double]("attr1"), + properties.get[Double]("attr2") + )) + ) + } catch { + case e: Exception => { + logger.error(s"Failed to get properties ${properties} of" + + s" ${entityId}. Exception: ${e}.") + throw e + } + } + }.cache() + // End of reading from data store + + // K-fold splitting + val evalK = dsp.evalK.get + val indexedPoints: RDD[(LabeledPoint, Long)] = labeledPoints.zipWithIndex() + + (0 until evalK).map { idx => + val trainingPoints = indexedPoints.filter(_._2 % evalK != idx).map(_._1) + val testingPoints = indexedPoints.filter(_._2 % evalK == idx).map(_._1) + + ( + new TrainingData(trainingPoints), + new EmptyEvaluationInfo(), + testingPoints.map { + p => (new Query(p.features(0), p.features(1), p.features(2)), new ActualResult(p.label)) + } + ) + } + } } class TrainingData( http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/src/main/scala/Engine.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/Engine.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Engine.scala index 5824534..1e26024 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/Engine.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Engine.scala @@ -15,20 +15,26 @@ * limitations under the License. */ -package org.template.classification +package org.apache.predictionio.examples.classification -import org.apache.predictionio.controller.IEngineFactory +import org.apache.predictionio.controller.EngineFactory import org.apache.predictionio.controller.Engine class Query( - val features: Array[Double] + val attr0 : Double, + val attr1 : Double, + val attr2 : Double ) extends Serializable class PredictedResult( val label: Double ) extends Serializable -object ClassificationEngine extends IEngineFactory { +class ActualResult( + val label: Double +) extends Serializable + +object ClassificationEngine extends EngineFactory { def apply() = { new Engine( classOf[DataSource], http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/src/main/scala/Evaluation.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/Evaluation.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Evaluation.scala new file mode 100644 index 0000000..3bc3399 --- /dev/null +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Evaluation.scala @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.predictionio.examples.classification + +import org.apache.predictionio.controller.AverageMetric +import org.apache.predictionio.controller.EmptyEvaluationInfo +import org.apache.predictionio.controller.EngineParams +import org.apache.predictionio.controller.EngineParamsGenerator +import org.apache.predictionio.controller.Evaluation + +case class Accuracy() + extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] { + def calculate(query: Query, predicted: PredictedResult, actual: ActualResult) + : Double = (if (predicted.label == actual.label) 1.0 else 0.0) +} + +object AccuracyEvaluation extends Evaluation { + // Define Engine and Metric used in Evaluation + engineMetric = (ClassificationEngine(), new Accuracy()) +} + +object EngineParamsList extends EngineParamsGenerator { + // Define list of EngineParams used in Evaluation + + // First, we define the base engine params. It specifies the appId from which + // the data is read, and a evalK parameter is used to define the + // cross-validation. + private[this] val baseEP = EngineParams( + dataSourceParams = DataSourceParams(appName = "INVALID_APP_NAME", evalK = Some(5))) + + // Second, we specify the engine params list by explicitly listing all + // algorithm parameters. In this case, we evaluate 3 engine params, each with + // a different algorithm params value. + engineParamsList = Seq( + baseEP.copy(algorithmParamsList = Seq(("naive", AlgorithmParams(10.0)))), + baseEP.copy(algorithmParamsList = Seq(("naive", AlgorithmParams(100.0)))), + baseEP.copy(algorithmParamsList = Seq(("naive", AlgorithmParams(1000.0))))) +} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala index 8c06040..ce7bda0 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala @@ -15,30 +15,42 @@ * limitations under the License. */ -package org.template.classification +package org.apache.predictionio.examples.classification import org.apache.predictionio.controller.P2LAlgorithm import org.apache.predictionio.controller.Params -import org.apache.spark.SparkContext import org.apache.spark.mllib.classification.NaiveBayes import org.apache.spark.mllib.classification.NaiveBayesModel import org.apache.spark.mllib.linalg.Vectors +import org.apache.spark.SparkContext + +import grizzled.slf4j.Logger -case class NaiveBayesAlgorithmParams( +case class AlgorithmParams( lambda: Double ) extends Params // extends P2LAlgorithm because the MLlib's NaiveBayesModel doesn't contain RDD. -class NaiveBayesAlgorithm(val ap: NaiveBayesAlgorithmParams) +class NaiveBayesAlgorithm(val ap: AlgorithmParams) extends P2LAlgorithm[PreparedData, NaiveBayesModel, Query, PredictedResult] { + @transient lazy val logger = Logger[this.type] + def train(sc: SparkContext, data: PreparedData): NaiveBayesModel = { + // MLLib NaiveBayes cannot handle empty training data. + require(data.labeledPoints.take(1).nonEmpty, + s"RDD[labeledPoints] in PreparedData cannot be empty." + + " Please check if DataSource generates TrainingData" + + " and Preparator generates PreparedData correctly.") + NaiveBayes.train(data.labeledPoints, ap.lambda) } def predict(model: NaiveBayesModel, query: Query): PredictedResult = { - val label = model.predict(Vectors.dense(query.features)) + val label = model.predict(Vectors.dense( + Array(query.attr0, query.attr1, query.attr2) + )) new PredictedResult(label) } http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/src/main/scala/PrecisionEvaluation.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/PrecisionEvaluation.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/PrecisionEvaluation.scala new file mode 100644 index 0000000..cd91a1e --- /dev/null +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/PrecisionEvaluation.scala @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.predictionio.examples.classification + +import org.apache.predictionio.controller.OptionAverageMetric +import org.apache.predictionio.controller.EmptyEvaluationInfo +import org.apache.predictionio.controller.Evaluation + +case class Precision(label: Double) + extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] { + override def header: String = s"Precision(label = $label)" + + def calculate(query: Query, predicted: PredictedResult, actual: ActualResult) + : Option[Double] = { + if (predicted.label == label) { + if (predicted.label == actual.label) { + Some(1.0) // True positive + } else { + Some(0.0) // False positive + } + } else { + None // Unrelated case for calculating precision + } + } +} + +object PrecisionEvaluation extends Evaluation { + engineMetric = (ClassificationEngine(), new Precision(label = 1.0)) +} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala index 497db19..20d8f8c 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala @@ -15,12 +15,11 @@ * limitations under the License. */ -package org.template.classification +package org.apache.predictionio.examples.classification import org.apache.predictionio.controller.PPreparator import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.mllib.regression.LabeledPoint http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala index 1caad43..5b472e9 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala @@ -15,15 +15,15 @@ * limitations under the License. */ -package org.template.classification +package org.apache.predictionio.examples.classification import org.apache.predictionio.controller.P2LAlgorithm import org.apache.predictionio.controller.Params -import org.apache.spark.SparkContext import org.apache.spark.mllib.tree.RandomForest // CHANGED import org.apache.spark.mllib.tree.model.RandomForestModel // CHANGED import org.apache.spark.mllib.linalg.Vectors +import org.apache.spark.SparkContext // CHANGED case class RandomForestAlgorithmParams( @@ -61,7 +61,9 @@ class RandomForestAlgorithm(val ap: RandomForestAlgorithmParams) // CHANGED model: RandomForestModel, // CHANGED query: Query): PredictedResult = { - val label = model.predict(Vectors.dense(query.features)) + val label = model.predict(Vectors.dense( + Array(query.attr0, query.attr1, query.attr2) + )) new PredictedResult(label) } http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/src/main/scala/Serving.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/Serving.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Serving.scala index 0fcca6a..706dfe2 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/Serving.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Serving.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.template.classification +package org.apache.predictionio.examples.classification import org.apache.predictionio.controller.LServing http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/add-algorithm/template.json ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/add-algorithm/template.json b/examples/scala-parallel-classification/add-algorithm/template.json new file mode 100644 index 0000000..d076ec5 --- /dev/null +++ b/examples/scala-parallel-classification/add-algorithm/template.json @@ -0,0 +1 @@ +{"pio": {"version": { "min": "0.10.0-incubating" }}} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/.gitignore ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/.gitignore b/examples/scala-parallel-classification/custom-attributes/.gitignore deleted file mode 100644 index edc8287..0000000 --- a/examples/scala-parallel-classification/custom-attributes/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -data/*.txt -manifest.json -pio.log -target/ http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/README.md ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/README.md b/examples/scala-parallel-classification/custom-attributes/README.md deleted file mode 100644 index 7c3e727..0000000 --- a/examples/scala-parallel-classification/custom-attributes/README.md +++ /dev/null @@ -1,196 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to You under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. ---> - -# Classification Engine Template with Custom Attributes and Random Forest Algorithm - -This example engine is based on Classification Tempplate version v0.1.1 and is modified to use Random Forest algorithm and demonstrates how to use custom attributes for classification. - -## Classification template - -Please refer to http://predictionio.incubator.apache.org/templates/classification/quickstart/ - -## Development Notes - -### Example Use Case - -predict the service plan (plan) a user will subscribe to based on his 3 properties: Gender, Age and Education - -### Input Data: - -Prepared below input data in the format âPlan, Gender, Age, Educationâ - -0,Male,30,College -0,Female,29,College -1,Male,30,High School -1,Female,28,High School -2,Male,35,No School -2,Male,40,No School -2,Female,25,No School -0,Male,40,College - -Input data is available in data/sample_random_forest_data.txt - -### Importing Input data - -Updated âimport_eventserver.pyâ withe below changes - -1) added âsample_random_forest_data.txtâ as file - ``` - parser.add_argument('--file', default="./data/sample_random_forest_data.txt") - ``` -2) Creating events with features. - ``` - client.create_event( - event="$set", - entity_type="user", - entity_id=str(count), # use the count num as user ID - properties= { - "gender" : data[1], - "age" : int(data[2]), - "education" : data[3], - "plan" : int(data[0]) - } - ) - ``` -### Changes to engine.json - -In the engine.json, removed ânaiveâ algorithm and added ârandomforestâ algorithm. -``` -"algorithms": [ - { - "name": "randomforest", - "params": { - "numClasses": 3, - "numTrees": 5, - "featureSubsetStrategy": "auto", - "impurity": "gini", - "maxDepth": 4, - "maxBins": 100 - } - } - ] -``` - -### Changes to DataSource.scala - -1) In the readTraining method, defined below maps - ```scala - val gendersMap = Map("Male" -> 0.0, "Female" -> 1.0) - val educationMap = Map("No School" -> 0.0,"High School" -> 1.0,"College" -> 2.0) - ``` -Then encoded the categorical features values using map. -```scala - LabeledPoint(properties.get[Double]("plan"), - Vectors.dense(Array( - gendersMap(properties.get[String]("gender")), - properties.get[Double]("age"), - educationMap(properties.get[String]("education")) - )) - ) -``` - -2) Added gendersMap and educationMap to the TrainingData class -```scala -class TrainingData( - val labeledPoints: RDD[LabeledPoint], - val gendersMap: Map[String,Double], - val educationMap: Map[String,Double] -) extends Serializable -``` -readTraining returns below: -```scala - new TrainingData(labeledPoints, - gendersMap, - educationMap) - -``` - -### Changes to Engine.scala - -In the Engine.scala, replaced ânaiveâ algorithm with ârandomforestâ algorithm -```scala - Map("randomforest" -> classOf[RandomForestAlgorithm]), -``` -Updated Query.sclaa to include attributes -```scala -class Query( - val gender: String, - val age: Int, - val education: String -) extends Serializable -``` -### Changes to Preparator.scala - -added attributes to PreparedData -```scala - class PreparedData( - val labeledPoints: RDD[LabeledPoint], - val gendersMap: Map[String,Double], - val educationMap: Map[String,Double] -) extends Serializable -``` -```scala -new PreparedData(trainingData.labeledPoints,trainingData.gendersMap,trainingData.educationMap) - -``` - - -### Created RandomForestAlgorithm.scala - -created new model class -```scala -class PIORandomForestModel( - val gendersMap: Map[String, Double], - val educationMap: Map[String, Double], - val randomForestModel: RandomForestModel -) extends Serializable -``` -train method returns new model class -```scala - new PIORandomForestModel( - gendersMap = data.gendersMap, - educationMap = data.educationMap, - randomForestModel = m - ) -``` - -Predict method implementation -```scala - def predict( - model: PIORandomForestModel, // CHANGED - query: Query): PredictedResult = { - val gendersMap = model.gendersMap - val educationMap = model.educationMap - val randomForestModel = model.randomForestModel - val label = randomForestModel.predict(Vectors.dense(Array(gendersMap(query.gender),query.age.toDouble,educationMap(query.education)))) - new PredictedResult(label) - } - -``` - - -### Sample Request -``` -curl -H "Content-Type: application/json" -d '{ "gender":"Male", -"age":30, -"education":"College" }' http://localhost:8000/queries.json - -curl -H "Content-Type: application/json" -d '{ "gender":"Female", -"age":35, -"education":"No School" }' http://localhost:8000/queries.json - -``` http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/build.sbt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/build.sbt b/examples/scala-parallel-classification/custom-attributes/build.sbt deleted file mode 100644 index 0f219d1..0000000 --- a/examples/scala-parallel-classification/custom-attributes/build.sbt +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import AssemblyKeys._ - -assemblySettings - -name := "template-scala-parallel-classification" - -organization := "org.apache.predictionio" - -libraryDependencies ++= Seq( - "org.apache.predictionio" %% "core" % "0.8.6" % "provided", - "commons-io" % "commons-io" % "2.4", - "org.apache.spark" %% "spark-core" % "1.2.0" % "provided", - "org.apache.spark" %% "spark-mllib" % "1.2.0" % "provided", - "org.json4s" %% "json4s-native" % "3.2.10") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/data/import_eventserver.py ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/data/import_eventserver.py b/examples/scala-parallel-classification/custom-attributes/data/import_eventserver.py deleted file mode 100644 index 1b69daf..0000000 --- a/examples/scala-parallel-classification/custom-attributes/data/import_eventserver.py +++ /dev/null @@ -1,61 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -Import sample data for classification engine -""" - -import predictionio -import argparse - -def import_events(client, file): - f = open(file, 'r') - count = 0 - print "Importing data..." - for line in f: - data = line.rstrip('\r\n').split(",") - client.create_event( - event="$set", - entity_type="user", - entity_id=str(count), # use the count num as user ID - properties= { - "gender" : data[1], - "age" : int(data[2]), - "education" : data[3], - "plan" : int(data[0]) - } - ) - count += 1 - f.close() - print "%s events are imported." % count - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="Import sample data for classification engine") - parser.add_argument('--access_key', default='invald_access_key') - parser.add_argument('--url', default="http://localhost:7070") - parser.add_argument('--file', default="./data/sample_random_forest_data.txt") - - args = parser.parse_args() - print args - - client = predictionio.EventClient( - access_key=args.access_key, - url=args.url, - threads=5, - qsize=500) - import_events(client, args.file) http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/data/sample_random_forest_data.txt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/data/sample_random_forest_data.txt b/examples/scala-parallel-classification/custom-attributes/data/sample_random_forest_data.txt deleted file mode 100644 index 15b146c..0000000 --- a/examples/scala-parallel-classification/custom-attributes/data/sample_random_forest_data.txt +++ /dev/null @@ -1,8 +0,0 @@ -0,Male,30,College -0,Female,29,College -1,Male,30,High School -1,Female,28,High School -2,Male,35,No School -2,Male,40,No School -2,Female,25,No School -0,Male,40,College http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/engine.json ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/engine.json b/examples/scala-parallel-classification/custom-attributes/engine.json deleted file mode 100644 index c14801e..0000000 --- a/examples/scala-parallel-classification/custom-attributes/engine.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "id": "default", - "description": "Default settings", - "engineFactory": "com.test1.ClassificationEngine", - "datasource": { - "params": { - "appId": 1 - } - }, - "algorithms": [ - { - "name": "randomforest", - "params": { - "numClasses": 3, - "numTrees": 5, - "featureSubsetStrategy": "auto", - "impurity": "gini", - "maxDepth": 4, - "maxBins": 100 - } - } - ] -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/project/assembly.sbt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/project/assembly.sbt b/examples/scala-parallel-classification/custom-attributes/project/assembly.sbt deleted file mode 100644 index 54c3252..0000000 --- a/examples/scala-parallel-classification/custom-attributes/project/assembly.sbt +++ /dev/null @@ -1 +0,0 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/src/main/scala/DataSource.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/src/main/scala/DataSource.scala b/examples/scala-parallel-classification/custom-attributes/src/main/scala/DataSource.scala deleted file mode 100644 index 7db3e06..0000000 --- a/examples/scala-parallel-classification/custom-attributes/src/main/scala/DataSource.scala +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.test1 - -import org.apache.predictionio.controller.PDataSource -import org.apache.predictionio.controller.EmptyEvaluationInfo -import org.apache.predictionio.controller.EmptyActualResult -import org.apache.predictionio.controller.Params -import org.apache.predictionio.data.storage.Event -import org.apache.predictionio.data.storage.Storage - -import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ -import org.apache.spark.rdd.RDD -import org.apache.spark.mllib.regression.LabeledPoint -import org.apache.spark.mllib.linalg.Vectors - -import grizzled.slf4j.Logger - -case class DataSourceParams(appId: Int) extends Params - -class DataSource(val dsp: DataSourceParams) - extends PDataSource[TrainingData, - EmptyEvaluationInfo, Query, EmptyActualResult] { - - @transient lazy val logger = Logger[this.type] - - override - def readTraining(sc: SparkContext): TrainingData = { - val eventsDb = Storage.getPEvents() - val gendersMap = Map("Male" -> 0.0, "Female" -> 1.0) - val educationMap = Map("No School" -> 0.0,"High School" -> 1.0,"College" -> 2.0) - val labeledPoints: RDD[LabeledPoint] = eventsDb.aggregateProperties( - appId = dsp.appId, - entityType = "user", - // only keep entities with these required properties defined - required = Some(List("plan", "gender", "age", "education")))(sc) - // aggregateProperties() returns RDD pair of - // entity ID and its aggregated properties - .map { case (entityId, properties) => - try { - LabeledPoint(properties.get[Double]("plan"), - Vectors.dense(Array( - gendersMap(properties.get[String]("gender")), - properties.get[Double]("age"), - educationMap(properties.get[String]("education")) - )) - ) - } catch { - case e: Exception => { - logger.error(s"Failed to get properties ${properties} of" + - s" ${entityId}. Exception: ${e}.") - throw e - } - } - }.cache() - - new TrainingData(labeledPoints, - gendersMap, - educationMap) - } -} - -class TrainingData( - val labeledPoints: RDD[LabeledPoint], - val gendersMap: Map[String,Double], - val educationMap: Map[String,Double] -) extends Serializable http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/src/main/scala/Engine.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/src/main/scala/Engine.scala b/examples/scala-parallel-classification/custom-attributes/src/main/scala/Engine.scala deleted file mode 100644 index 053a7c8..0000000 --- a/examples/scala-parallel-classification/custom-attributes/src/main/scala/Engine.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.test1 - -import org.apache.predictionio.controller.IEngineFactory -import org.apache.predictionio.controller.Engine - -class Query( - val gender: String, - val age: Int, - val education: String -) extends Serializable - -class PredictedResult( - val label: Double -) extends Serializable - -object ClassificationEngine extends IEngineFactory { - def apply() = { - new Engine( - classOf[DataSource], - classOf[Preparator], - Map("randomforest" -> classOf[RandomForestAlgorithm]), - classOf[Serving]) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/src/main/scala/Preparator.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/src/main/scala/Preparator.scala b/examples/scala-parallel-classification/custom-attributes/src/main/scala/Preparator.scala deleted file mode 100644 index 38745dc..0000000 --- a/examples/scala-parallel-classification/custom-attributes/src/main/scala/Preparator.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.test1 - -import org.apache.predictionio.controller.PPreparator - -import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ -import org.apache.spark.rdd.RDD -import org.apache.spark.mllib.regression.LabeledPoint - -class PreparedData( - val labeledPoints: RDD[LabeledPoint], - val gendersMap: Map[String,Double], - val educationMap: Map[String,Double] -) extends Serializable - -class Preparator extends PPreparator[TrainingData, PreparedData] { - - def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { - new PreparedData( - trainingData.labeledPoints, - trainingData.gendersMap, - trainingData.educationMap) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/src/main/scala/RandomForestAlgorithm.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/src/main/scala/RandomForestAlgorithm.scala b/examples/scala-parallel-classification/custom-attributes/src/main/scala/RandomForestAlgorithm.scala deleted file mode 100644 index 3bac4ec..0000000 --- a/examples/scala-parallel-classification/custom-attributes/src/main/scala/RandomForestAlgorithm.scala +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.test1 - -import org.apache.predictionio.controller.P2LAlgorithm -import org.apache.predictionio.controller.Params - -import org.apache.spark.mllib.tree.RandomForest // CHANGED -import org.apache.spark.mllib.tree.model.RandomForestModel // CHANGED -import org.apache.spark.mllib.linalg.Vectors - -// CHANGED -case class RandomForestAlgorithmParams( - numClasses: Int, - numTrees: Int, - featureSubsetStrategy: String, - impurity: String, - maxDepth: Int, - maxBins: Int -) extends Params - -class PIORandomForestModel( - val gendersMap: Map[String, Double], - val educationMap: Map[String, Double], - val randomForestModel: RandomForestModel -) extends Serializable - -// extends P2LAlgorithm because the MLlib's RandomForestModel doesn't -// contain RDD. -class RandomForestAlgorithm(val ap: RandomForestAlgorithmParams) // CHANGED - extends P2LAlgorithm[PreparedData, PIORandomForestModel, // CHANGED - Query, PredictedResult] { - - def train(data: PreparedData): PIORandomForestModel = { // CHANGED - // CHANGED - // Empty categoricalFeaturesInfo indicates all features are continuous. - val categoricalFeaturesInfo = Map[Int, Int]() - val m = RandomForest.trainClassifier( - data.labeledPoints, - ap.numClasses, - categoricalFeaturesInfo, - ap.numTrees, - ap.featureSubsetStrategy, - ap.impurity, - ap.maxDepth, - ap.maxBins) - new PIORandomForestModel( - gendersMap = data.gendersMap, - educationMap = data.educationMap, - randomForestModel = m - ) - } - - def predict( - model: PIORandomForestModel, // CHANGED - query: Query): PredictedResult = { - val gendersMap = model.gendersMap - val educationMap = model.educationMap - val randomForestModel = model.randomForestModel - val label = randomForestModel.predict( - Vectors.dense(Array( - gendersMap(query.gender), - query.age.toDouble, - educationMap(query.education)) - )) - new PredictedResult(label) - } - -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/custom-attributes/src/main/scala/Serving.scala ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/custom-attributes/src/main/scala/Serving.scala b/examples/scala-parallel-classification/custom-attributes/src/main/scala/Serving.scala deleted file mode 100644 index e08def0..0000000 --- a/examples/scala-parallel-classification/custom-attributes/src/main/scala/Serving.scala +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.test1 - -import org.apache.predictionio.controller.LServing - -class Serving extends LServing[Query, PredictedResult] { - - override - def serve(query: Query, - predictedResults: Seq[PredictedResult]): PredictedResult = { - predictedResults.head - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/reading-custom-properties/.gitignore ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/reading-custom-properties/.gitignore b/examples/scala-parallel-classification/reading-custom-properties/.gitignore new file mode 100644 index 0000000..9917afe --- /dev/null +++ b/examples/scala-parallel-classification/reading-custom-properties/.gitignore @@ -0,0 +1,5 @@ +manifest.json +pio.log +/pio.sbt +target/ +.idea http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/reading-custom-properties/build.sbt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/reading-custom-properties/build.sbt b/examples/scala-parallel-classification/reading-custom-properties/build.sbt new file mode 100644 index 0000000..f5fb250 --- /dev/null +++ b/examples/scala-parallel-classification/reading-custom-properties/build.sbt @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +name := "template-scala-parallel-classification" + +organization := "org.apache.predictionio" +scalaVersion := "2.11.8" +libraryDependencies ++= Seq( + "org.apache.predictionio" %% "apache-predictionio-core" % "0.11.0-incubating" % "provided", + "org.apache.spark" %% "spark-mllib" % "2.1.1" % "provided") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/reading-custom-properties/data/data.txt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/reading-custom-properties/data/data.txt b/examples/scala-parallel-classification/reading-custom-properties/data/data.txt new file mode 100644 index 0000000..c08ad7c --- /dev/null +++ b/examples/scala-parallel-classification/reading-custom-properties/data/data.txt @@ -0,0 +1,153 @@ +0,51 35 12 +0,49 30 12 +0,47 32 12 +0,46 31 12 +0,50 36 12 +0,54 39 14 +0,46 34 13 +0,50 34 12 +0,44 29 12 +0,49 31 11 +0,54 37 12 +0,48 34 12 +0,48 30 11 +0,43 30 11 +0,58 40 12 +0,57 44 14 +0,54 39 14 +0,51 35 13 +0,57 38 13 +0,51 38 13 +0,54 34 12 +0,51 37 14 +0,46 36 12 +0,51 33 15 +0,48 34 12 +0,50 30 12 +0,50 34 14 +0,52 35 12 +0,52 34 12 +0,47 32 12 +0,48 31 12 +0,54 34 14 +0,52 41 11 +0,55 42 12 +0,49 31 11 +0,50 32 12 +0,55 35 12 +0,49 31 11 +0,44 30 12 +0,51 34 12 +0,50 35 13 +0,45 23 13 +0,44 32 12 +0,50 35 16 +0,51 38 14 +0,48 30 13 +0,51 38 12 +0,46 32 12 +0,53 37 12 +0,50 33 12 +1,70 32 14 +1,64 32 15 +1,69 31 15 +1,55 23 13 +1,65 28 15 +1,57 28 13 +1,63 33 16 +1,49 24 10 +1,66 29 13 +1,52 27 14 +1,50 20 10 +1,59 30 15 +1,60 22 10 +1,61 29 14 +1,56 29 13 +1,67 31 14 +1,56 30 15 +1,58 27 10 +1,62 22 15 +1,56 25 11 +1,59 32 18 +1,61 28 13 +1,63 25 15 +1,61 28 12 +1,64 29 13 +1,66 30 14 +1,68 28 14 +1,67 30 17 +1,60 29 15 +1,57 26 10 +1,55 24 11 +1,55 24 10 +1,58 27 12 +1,60 27 16 +1,54 30 15 +1,60 34 16 +1,67 31 15 +1,63 23 13 +1,56 30 13 +1,55 25 13 +1,55 26 12 +1,61 30 14 +1,58 26 12 +1,50 23 10 +1,56 27 13 +1,57 30 12 +1,57 29 13 +1,62 29 13 +1,51 25 11 +1,57 28 13 +2,63 33 25 +2,58 27 19 +2,71 30 21 +2,63 29 18 +2,65 30 22 +2,76 30 21 +2,49 25 17 +2,73 29 18 +2,67 25 18 +2,72 36 25 +2,65 32 20 +2,64 27 19 +2,68 30 21 +2,57 25 20 +2,58 28 24 +2,64 32 23 +2,65 30 18 +2,77 38 22 +2,77 26 23 +2,60 22 15 +2,69 32 23 +2,56 28 20 +2,77 28 20 +2,63 27 18 +2,67 33 21 +2,72 32 18 +2,62 28 18 +2,61 30 18 +2,64 28 21 +2,72 30 16 +2,74 28 19 +2,79 38 20 +2,64 28 22 +2,63 28 15 +2,61 26 14 +2,77 30 23 +2,63 34 24 +2,64 31 18 +2,60 30 18 +2,69 31 21 +2,67 31 24 +2,69 31 23 +2,58 27 19 +2,68 32 23 +2,67 33 25 +2,67 30 23 +2,63 25 19 +2,65 30 20 +2,62 34 23 +2,59 30 18 +3,80 10 70 +3,82 20 71 +3,90 15 73 http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/reading-custom-properties/data/import_eventserver.py ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/reading-custom-properties/data/import_eventserver.py b/examples/scala-parallel-classification/reading-custom-properties/data/import_eventserver.py new file mode 100644 index 0000000..573bf25 --- /dev/null +++ b/examples/scala-parallel-classification/reading-custom-properties/data/import_eventserver.py @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Import sample data for classification engine +""" + +import predictionio +import argparse + +def import_events(client, file): + f = open(file, 'r') + count = 0 + print("Importing data...") + for line in f: + data = line.rstrip('\r\n').split(",") + label = data[0] + attr = data[1].split(" ") + client.create_event( + event="$set", + entity_type="item", + entity_id=str(count), + properties= { + "featureA" : int(attr[0]), + "featureB" : int(attr[1]), + "featureC" : int(attr[2]), + "featureD" : 0, + "label" : int(label) + } + ) + count += 1 + f.close() + print("%s events are imported." % count) + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Import sample data for classification engine") + parser.add_argument('--access_key', default='invald_access_key') + parser.add_argument('--url', default="http://localhost:7070") + parser.add_argument('--file', default="./data/data.txt") + + args = parser.parse_args() + print(args) + + client = predictionio.EventClient( + access_key=args.access_key, + url=args.url, + threads=5, + qsize=500) + import_events(client, args.file) http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/reading-custom-properties/engine.json ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/reading-custom-properties/engine.json b/examples/scala-parallel-classification/reading-custom-properties/engine.json new file mode 100644 index 0000000..c64dac0 --- /dev/null +++ b/examples/scala-parallel-classification/reading-custom-properties/engine.json @@ -0,0 +1,18 @@ +{ + "id": "default", + "description": "Default settings", + "engineFactory": "org.apache.predictionio.examples.classification.ClassificationEngine", + "datasource": { + "params": { + "appName": "MyApp1" + } + }, + "algorithms": [ + { + "name": "naive", + "params": { + "lambda": 1.0 + } + } + ] +} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/reading-custom-properties/project/assembly.sbt ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/reading-custom-properties/project/assembly.sbt b/examples/scala-parallel-classification/reading-custom-properties/project/assembly.sbt new file mode 100644 index 0000000..e17409e --- /dev/null +++ b/examples/scala-parallel-classification/reading-custom-properties/project/assembly.sbt @@ -0,0 +1 @@ +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.4") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/76f34090/examples/scala-parallel-classification/reading-custom-properties/project/build.properties ---------------------------------------------------------------------- diff --git a/examples/scala-parallel-classification/reading-custom-properties/project/build.properties b/examples/scala-parallel-classification/reading-custom-properties/project/build.properties new file mode 100644 index 0000000..cf19fd0 --- /dev/null +++ b/examples/scala-parallel-classification/reading-custom-properties/project/build.properties @@ -0,0 +1 @@ +sbt.version=0.13.15 \ No newline at end of file
