http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/file_random.py ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/file_random.py b/examples/experimental/scala-local-friend-recommendation/file_random.py deleted file mode 100644 index 95621e9..0000000 --- a/examples/experimental/scala-local-friend-recommendation/file_random.py +++ /dev/null @@ -1,207 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -import random - -read_file = open("data/user_profile.txt", 'r') -write_file = open("data/mini_user_profile.txt", 'w') -number_of_lines = int(sys.argv[1]) -number_of_items = int(sys.argv[2]) - -#record number of lines -count = 0 -random_num_list = [] -# loop through the file to get number of lines in the file -for line in read_file: - count += 1 - -print "generating random numbers" -# generating a list of random lines to read from -for i in range(0, number_of_lines): - random_num_list.append(random.randint(0, count)) - -#get rid of any duplicates -no_duplicate_list = list(set(random_num_list)) - -#sort the list -no_duplicate_list.sort() -#print no_duplicate_list - -#go to file begining -read_file.seek(0) -count = 0 -index = 0 -user_id_list = [] -print "getting lines from user_profile" -for line in read_file: - if count == no_duplicate_list[index]: - write_file.write(line) - index += 1 - user_id_list.append(int(line.split()[0])) - if index == len(no_duplicate_list): - break - count += 1 - -#user_id_list is sorted - -user_id_list = map(str, user_id_list) -user_id_list.sort() -#print user_id_list -print "user_id finished" - -print "getting lines from item" -read_file = open("data/item.txt", 'r') -write_file = open("data/mini_item.txt", 'w') -count = 0 -random_num_list = [] -for line in read_file: - count += 1 - -for i in range(0, number_of_items): - random_num_list.append(random.randint(0, count)) - -#no duplicate -random_num_list = list(set(random_num_list)) - -random_num_list.sort() - -read_file.seek(0) -count = 0 -index = 0 -item_id_list = [] -for line in read_file: - if count == random_num_list[index]: - write_file.write(line) - index += 1 - item_id_list.append(int(line.split()[0])) - if index == len(random_num_list): - break - count += 1 -print "item finished" - -print "getting mini user_key_word" -read_file = open("data/user_key_word.txt", 'r') -write_file = open("data/mini_user_key_word.txt", 'w') - -#record number of lines -count = 0 -index = 0 -# loop through the file to get number of lines in the file -for line in read_file: - if line.split()[0] == user_id_list[index]: - write_file.write(line) - index += 1 - if index == len(user_id_list): - #print "break" - break -print "user keyword finished" -#go to file begining -#getting the user_sns_small - -print "getting user sns" -#print user_id_list -read_file = open("data/user_sns.txt", 'r') - -#write_file = open("data/mini_user_sns_small.txt", 'w') -user_sns_list = [] -index = 0 -met = False -count = 0 -for line in read_file: - count += 1 - #print count - #Same user multiple following - if met: - if line.split()[0] != user_id_list[index]: - index += 1 - met = False - if index == len(user_id_list): - break - if line.split()[0] == user_id_list[index]: - #print "here" - user_sns_list.append(line) - met = True - # if the current line's user is greater than the user list, that means - # the user doesn't follow or are following, then we move to next user - if line.split()[0] > user_id_list[index]: - index += 1 - if index == len(user_id_list): - break - -#print user_sns_list -write_file = open("data/mini_user_sns.txt",'w') -for line in user_sns_list: - for user_id in user_id_list: - if line.split()[1] == user_id: - write_file.write(line) - break -print "sns got" - -print "getting user action" -#for line in write_file: -read_file = open("data/user_action.txt", 'r') -user_action_list = [] -index = 0 -met = False -count = 0 -for line in read_file: - count += 1 - #print count - if met: - if line.split()[0] != user_id_list[index]: - index += 1 - met = False - if index == len(user_id_list): - break - if line.split()[0] == user_id_list[index]: - #print "here" - user_action_list.append(line) - met = True - if line.split()[0] > user_id_list[index]: - index += 1 - if index == len(user_id_list): - break -#print user_action_list -write_file = open("data/mini_user_action.txt",'w') -for line in user_action_list: - for user_id in user_id_list: - if line.split()[1] == user_id: - write_file.write(line) - break -print "user action got" - -print "getting rec_log_train" -user_set = set(user_id_list) -item_set = set(item_id_list) -read_file = open("data/rec_log_train.txt", 'r') -write_file = open("data/mini_rec_log_train.txt",'w') -count = 0 -#for item in item_set: -# print type(item) -#for user in user_set: -# print type(user) -for line in read_file: - words = line.split() -# if words[0] in user_set and (words[1] in user_set or words[1] in item_set): - if words[0] in user_set and words[1] in item_set: - write_file.write(line) - print count - count += 1 - -print "Done" -
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/keyword_similarity_engine.json ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/keyword_similarity_engine.json b/examples/experimental/scala-local-friend-recommendation/keyword_similarity_engine.json deleted file mode 100644 index f80fe5d..0000000 --- a/examples/experimental/scala-local-friend-recommendation/keyword_similarity_engine.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "id": "org.apache.predictionio.examples.friendrecommendation.keywordsimilarity", - "version": "0.1", - "name": "Friend Recommendation Engine with Keyword Similarity Method", - "engineFactory": "org.apache.predictionio.examples.friendrecommendation.KeywordSimilarityEngineFactory", - "datasource": { - "itemFilePath": "data/item.txt", - "userKeywordFilePath": "data/user_key_word.txt", - "userActionFilePath": "data/user_action.txt", - "trainingRecordFilePath": "data/rec_log_train.txt" - }, - "algorithms": [ - { - "name": "KeywordSimilarityAlgorithm", - "params": { - } - } - ] -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/project/assembly.sbt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/project/assembly.sbt b/examples/experimental/scala-local-friend-recommendation/project/assembly.sbt deleted file mode 100644 index 54c3252..0000000 --- a/examples/experimental/scala-local-friend-recommendation/project/assembly.sbt +++ /dev/null @@ -1 +0,0 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/random_engine.json ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/random_engine.json b/examples/experimental/scala-local-friend-recommendation/random_engine.json deleted file mode 100644 index 5aba3c3..0000000 --- a/examples/experimental/scala-local-friend-recommendation/random_engine.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "id": "org.apache.predictionio.examples.friendrecommendation.random", - "version": "0.1", - "name": "Friend Recommendation Engine with Random Method", - "engineFactory": "org.apache.predictionio.examples.friendrecommendation.RandomEngineFactory", - "datasource": { - "itemFilePath": "data/item.txt", - "userKeywordFilePath": "data/user_key_word.txt", - "userActionFilePath": "data/user_action.txt", - "trainingRecordFilePath": "data/rec_log_train.txt" - }, - "algorithms": [ - { - "name": "RandomAlgorithm", - "params": { - } - } - ] -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationAlgoParams.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationAlgoParams.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationAlgoParams.scala deleted file mode 100644 index 596a3fa..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationAlgoParams.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -import org.apache.predictionio.controller._ - -class FriendRecommendationAlgoParams ( -) extends Params http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationDataSource.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationDataSource.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationDataSource.scala deleted file mode 100644 index 399e31d..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationDataSource.scala +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -import org.apache.predictionio.controller._ -import scala.io.Source -import scala.collection.immutable.HashMap - -class FriendRecommendationDataSource ( - val dsp: FriendRecommendationDataSourceParams -) extends LDataSource[FriendRecommendationTrainingData, - EmptyEvaluationInfo, FriendRecommendationQuery, EmptyActualResult] { - - override - def readTraining() : FriendRecommendationTrainingData = { - val (itemIdMap, itemKeyword) = readItem(dsp.itemFilePath) - val (userIdMap, userKeyword) = readUser(dsp.userKeywordFilePath) - val adjArray = readRelationship(dsp.userActionFilePath, - userKeyword.size, userIdMap) - // Originally for the purpose of training an acceptance threshold - // Commented out here due to the high time and space complexity of training - // val trainingRecord = readTrainingRecord(dsp.trainingRecordFilePath, - // userIdMap, itemIdMap) - val trainingRecord = null - new FriendRecommendationTrainingData(userIdMap, - itemIdMap, userKeyword, itemKeyword, adjArray, trainingRecord) - } - - def readItem(file: String) : - (HashMap[Int, Int], Array[HashMap[Int, Double]]) = { - val itemSize = Source.fromFile(file).getLines().size - val lines = Source.fromFile(file).getLines() - // An array on Map[keywordId -> weight] values with internal item id index - val itemKeyword = new Array[HashMap[Int, Double]](itemSize) - // A map from external id to internal id - var itemIdMap = new HashMap[Int, Int]() - var internalId = 0 - lines.foreach{ - line => - val data = line.split("\\s") - itemIdMap += (data(0).toInt -> internalId) - var keywordMap = new HashMap[Int, Double]() - data(2).split(";").foreach{ - term => - keywordMap += (term.toInt -> 1.0) - } - itemKeyword(internalId) = keywordMap - internalId += 1 - } - (itemIdMap, itemKeyword) - } - - def readUser(file: String) : - (HashMap[Int, Int], Array[HashMap[Int, Double]]) = { - val userSize = Source.fromFile(file).getLines().size - val lines = Source.fromFile(file).getLines() - // An array on Map[keywordId -> weight] values with internal item id index - val userKeyword = new Array[HashMap[Int, Double]](userSize) - // A map from external id to internal id - var userIdMap = new HashMap[Int, Int]() - var internalId = 0 - lines.foreach{ - line => - val data = line.split("\\s") - userIdMap += (data(0).toInt -> internalId) - var keywordMap = new HashMap[Int, Double]() - data(1).split(";").foreach{ - termWeight => - val termWeightPair = termWeight.split(":") - keywordMap += (termWeightPair(0).toInt -> termWeightPair(1).toDouble) - } - userKeyword(internalId) = keywordMap - internalId += 1 - } - (userIdMap, userKeyword) - } - - def readRelationship(file: String, - userSize: Int, userIdMap: HashMap[Int, Int]) : - Array[List[(Int, Int)]] = { - val adjArray = new Array[List[(Int, Int)]](userSize) - val lines = Source.fromFile(file).getLines() - lines.foreach{ - line => - val data = line.split("\\s").map(s => s.toInt) - if (userIdMap.contains(data(0)) && userIdMap.contains(data(1))) { - val srcInternalId = userIdMap(data(0)) - val destInternalId = userIdMap(data(1)) - if (adjArray(srcInternalId) == null) { - adjArray(srcInternalId) = (destInternalId, data.slice(2,5).sum):: - List() - } else { - adjArray(srcInternalId) = (destInternalId, data.slice(2,5).sum):: - adjArray(srcInternalId) - } - } - } - adjArray - } - - def readTrainingRecord(file: String, - userIdMap: HashMap[Int, Int], itemIdMap: HashMap[Int, Int]) : - Stream[(Int, Int, Boolean)] = { - val lines = Source.fromFile(file).getLines() - var trainingRecord: Stream[(Int, Int, Boolean)] = Stream() - lines.foreach{ - line => - val data = line.split("\\s") - val userId = userIdMap(data(0).toInt) - val itemId = itemIdMap(data(1).toInt) - val result = (data(2).toInt == 1) - trainingRecord = (userId, itemId, result) #:: trainingRecord - } - trainingRecord - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationDataSourceParams.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationDataSourceParams.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationDataSourceParams.scala deleted file mode 100644 index bd8f6b0..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationDataSourceParams.scala +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -import org.apache.predictionio.controller._ - -class FriendRecommendationDataSourceParams( - val itemFilePath: String, - val userKeywordFilePath: String, - val userActionFilePath: String, - val trainingRecordFilePath: String -) extends Params http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationPrediction.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationPrediction.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationPrediction.scala deleted file mode 100644 index c7e2b8b..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationPrediction.scala +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -class FriendRecommendationPrediction ( - val confidence: Double, - // returning boolean acceptance to align with KDD 2012 scenario - val acceptance: Boolean -) extends Serializable http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationQuery.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationQuery.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationQuery.scala deleted file mode 100644 index ca2f912..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationQuery.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -class FriendRecommendationQuery ( - // To align with the KDD 2012 scenario - // Given a user and an item, predict acceptance - val user: Int, - val item: Int -) extends Serializable http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationTrainingData.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationTrainingData.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationTrainingData.scala deleted file mode 100644 index 2a0596e..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/FriendRecommendationTrainingData.scala +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -import scala.collection.immutable.HashMap - -class FriendRecommendationTrainingData ( - // Designed to fit random, keyword similarity methods and simrank now - // Will be updated to to fit more advanced algorithms when they are developed - // External-internal id map - val userIdMap: HashMap[Int, Int], - val itemIdMap: HashMap[Int, Int], - // Keyword array, internal id index, term-weight map item - val userKeyword: Array[HashMap[Int, Double]], - val itemKeyword: Array[HashMap[Int, Double]], - // User relationship array, - // src internal id index, dest-internal-id-weight list item - val socialAction: Array[List[(Int, Int)]], - // Training record for training purpose - val trainingRecord: Stream[(Int, Int, Boolean)] -) extends Serializable http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityAlgorithm.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityAlgorithm.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityAlgorithm.scala deleted file mode 100644 index 115fa85..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityAlgorithm.scala +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -import org.apache.predictionio.controller._ -import scala.collection.immutable.HashMap -import scala.math -import scala.io.Source - -class KeywordSimilarityAlgorithm (val ap: FriendRecommendationAlgoParams) - extends LAlgorithm[FriendRecommendationTrainingData, - KeywordSimilarityModel, FriendRecommendationQuery, - FriendRecommendationPrediction] { - - override - def train(td: FriendRecommendationTrainingData): KeywordSimilarityModel = { - var keywordSimWeight = 1.0 - var keywordSimThreshold = 1.0 - // Originally for the purpose of training an acceptance threshold - // Commented out here due to the high time and space complexity of training - /* - td.trainingRecord.foreach{ - record => - val sim = findKeywordSimilarity(td.userKeyword(record._1), - td.itemKeyword(record._2)) - val prediction = (keywordSimWeight * sim - keywordSimThreshold >= 0) - if (prediction != record._3) { - val y = if (record._3) 1 else -1 - keywordSimWeight += y * sim - keywordSimThreshold += y * -1 - } - } - */ - new KeywordSimilarityModel(td.userIdMap, - td.itemIdMap, td.userKeyword, td.itemKeyword, - keywordSimWeight, keywordSimThreshold) - } - - def findKeywordSimilarity(keywordMap1: HashMap[Int, Double], - keywordMap2: HashMap[Int, Double]): - Double = { - var similarity = 0.0 - keywordMap1.foreach(kw => - similarity += kw._2 * keywordMap2.getOrElse(kw._1, 0.0)) - similarity - } - - override - def predict(model: KeywordSimilarityModel, - query: FriendRecommendationQuery): - FriendRecommendationPrediction = { - // Currently use empty map for unseen users or items - if (model.userIdMap.contains(query.user) && - model.itemIdMap.contains(query.item)) { - val confidence = findKeywordSimilarity( - model.userKeyword(model.userIdMap(query.user)), - model.itemKeyword(model.itemIdMap(query.item))) - val acceptance = ((confidence * model.keywordSimWeight) - >= model.keywordSimThreshold) - new FriendRecommendationPrediction(confidence, acceptance) - } else { - val confidence = 0 - val acceptance = ((confidence * model.keywordSimWeight) - >= model.keywordSimThreshold) - new FriendRecommendationPrediction(confidence, acceptance) - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityEngineFactory.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityEngineFactory.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityEngineFactory.scala deleted file mode 100644 index 51023a4..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityEngineFactory.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -import org.apache.predictionio.controller._ - -object KeywordSimilarityEngineFactory extends IEngineFactory { - override - def apply() = { - new Engine( - classOf[FriendRecommendationDataSource], - classOf[LIdentityPreparator[FriendRecommendationTrainingData]], - Map("KeywordSimilarityAlgorithm" -> classOf[KeywordSimilarityAlgorithm]), - classOf[LFirstServing[FriendRecommendationQuery, - FriendRecommendationPrediction]] - ) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityModel.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityModel.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityModel.scala deleted file mode 100644 index 6de7ee3..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/KeywordSimilarityModel.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -import scala.collection.immutable.HashMap - -class KeywordSimilarityModel ( - // External-internal id map - val userIdMap: HashMap[Int, Int], - val itemIdMap: HashMap[Int, Int], - // Keyword array, internal id index, term-weight map item - val userKeyword: Array[HashMap[Int, Double]], - val itemKeyword: Array[HashMap[Int, Double]], - // Weight and threshold trained - val keywordSimWeight: Double, - val keywordSimThreshold: Double -) extends Serializable http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomAlgorithm.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomAlgorithm.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomAlgorithm.scala deleted file mode 100644 index a71913e..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomAlgorithm.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -import org.apache.predictionio.controller._ - -// For random algorithm -import scala.util.Random - -class RandomAlgorithm (val ap: FriendRecommendationAlgoParams) - extends LAlgorithm[FriendRecommendationTrainingData, - RandomModel, FriendRecommendationQuery, FriendRecommendationPrediction] { - - override - def train(pd: FriendRecommendationTrainingData): RandomModel = { - new RandomModel(0.5) - } - - override - def predict(model: RandomModel, query: FriendRecommendationQuery): - FriendRecommendationPrediction = { - val randomConfidence = Random.nextDouble - val acceptance = randomConfidence >= model.randomThreshold - new FriendRecommendationPrediction(randomConfidence, acceptance) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomEngineFactory.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomEngineFactory.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomEngineFactory.scala deleted file mode 100644 index e650199..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomEngineFactory.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -import org.apache.predictionio.controller._ - -object RandomEngineFactory extends IEngineFactory { - override - def apply() = { - new Engine( - classOf[FriendRecommendationDataSource], - classOf[LIdentityPreparator[FriendRecommendationTrainingData]], - Map("RandomAlgorithm" -> classOf[RandomAlgorithm]), - classOf[LFirstServing[FriendRecommendationQuery, - FriendRecommendationPrediction]] - ) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomModel.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomModel.scala b/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomModel.scala deleted file mode 100644 index 91a8866..0000000 --- a/examples/experimental/scala-local-friend-recommendation/src/main/scala/RandomModel.scala +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.friendrecommendation - -class RandomModel( - val randomThreshold: Double -) extends Serializable http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-helloworld/HelloWorld.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-helloworld/HelloWorld.scala b/examples/experimental/scala-local-helloworld/HelloWorld.scala deleted file mode 100644 index 6ec4f0a..0000000 --- a/examples/experimental/scala-local-helloworld/HelloWorld.scala +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.sample.helloworld - -import org.apache.predictionio.controller._ - -import scala.io.Source -import scala.collection.immutable.HashMap - -// all data need to be serializable -class MyTrainingData( - // list of (day, temperature) tuples - val temperatures: List[(String, Double)] -) extends Serializable - -class MyQuery( - val day: String -) extends Serializable - -class MyModel( - val temperatures: HashMap[String, Double] -) extends Serializable { - override def toString = temperatures.toString -} - -class MyPredictedResult( - val temperature: Double -) extends Serializable - -case class MyDataSourceParams(val multiplier: Int - ) extends Params - -class MyDataSource extends LDataSource[ - MyTrainingData, - EmptyEvaluationInfo, - MyQuery, - EmptyActualResult] { - - /* override this to return Training Data only */ - - override - def readTraining(): MyTrainingData = { - val lines = Source.fromFile("../data/helloworld/data.csv").getLines() - .toList.map{ line => - val data = line.split(",") - (data(0), data(1).toDouble) - } - - new MyTrainingData(lines) - } -} - -class MyAlgorithm extends LAlgorithm[ - MyTrainingData, - MyModel, - MyQuery, - MyPredictedResult] { - - - override - def train(pd: MyTrainingData): MyModel = { - // calculate average value of each day - val average = pd.temperatures - .groupBy(_._1) // group by day - .mapValues{ list => - val tempList = list.map(_._2) // get the temperature - tempList.sum / tempList.size - } - - // trait Map is not serializable, use concrete class HashMap - new MyModel(HashMap[String, Double]() ++ average) - } - - override - def predict(model: MyModel, query: MyQuery): MyPredictedResult = { - val temp = model.temperatures(query.day) - new MyPredictedResult(temp) - } -} - -// factory -object MyEngineFactory extends IEngineFactory { - override - def apply() = { - /* SimpleEngine only requires one DataSouce and one Algorithm */ - new SimpleEngine( - classOf[MyDataSource], - classOf[MyAlgorithm] - ) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-helloworld/README.md ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-helloworld/README.md b/examples/experimental/scala-local-helloworld/README.md deleted file mode 100644 index 4a8310b..0000000 --- a/examples/experimental/scala-local-helloworld/README.md +++ /dev/null @@ -1,92 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to You under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. ---> - -# My First "Hello World" Engine - -Prepare training data: -``` -$ cd $PIO_HOME/examples/scala-local-helloworld -$ cp ../data/helloworld/data1.csv ../data/helloworld/data.csv -``` - -Build engine: - -``` -$ ../../bin/pio build -``` - -Train: - -``` -$ ../../bin/pio train -``` - -Example output: - -``` -2014-08-05 17:06:02,638 INFO APIDebugWorkflow$ - Metrics is null. Stop here -2014-08-05 17:06:02,769 INFO APIDebugWorkflow$ - Run information saved with ID: 201408050005 -``` - -Deploy: - -``` -$ ../../bin/pio deploy -``` - -Retrieve prediction: - -``` -$ curl -H "Content-Type: application/json" -d '{ "day": "Mon" }' http://localhost:8000/queries.json -``` - -Output: - -``` -{"temperature":75.5} -``` - -Retrieve prediction: - -``` -$ curl -H "Content-Type: application/json" -d '{ "day": "Tue" }' http://localhost:8000/queries.json -``` - -Output: -``` -{"temperature":80.5} -``` - -## 4. Re-training - -Re-train with new data: - -``` -$ cd $PIO_HOME/examples/scala-local-helloworld -$ cp ../data/helloworld/data2.csv ../data/helloworld/data.csv -``` - -``` -$ ../../bin/pio train -$ ../../bin/pio deploy -``` - -``` -$ curl -H "Content-Type: application/json" -d '{ "day": "Mon" }' http://localhost:8000/queries.json - -{"temperature":76.66666666666667} -``` http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-helloworld/build.sbt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-helloworld/build.sbt b/examples/experimental/scala-local-helloworld/build.sbt deleted file mode 100644 index e14c05b..0000000 --- a/examples/experimental/scala-local-helloworld/build.sbt +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import AssemblyKeys._ - -assemblySettings - -name := "example-scala-local-helloworld" - -organization := "org.sample" - -libraryDependencies ++= Seq( - "org.slf4j" % "slf4j-api" % "1.6.1", - "org.apache.predictionio" %% "apache-predictionio-data" % "0.10.0-incubating", - "org.apache.predictionio" %% "apache-predictionio-core" % "0.10.0-incubating", - "org.apache.spark" %% "spark-core" % "1.2.0" % "provided") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-helloworld/engine.json ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-helloworld/engine.json b/examples/experimental/scala-local-helloworld/engine.json deleted file mode 100644 index 6dd2f74..0000000 --- a/examples/experimental/scala-local-helloworld/engine.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "id": "default", - "description": "My Hello World Engine", - "engineFactory": "org.sample.helloworld.MyEngineFactory" -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-helloworld/project/assembly.sbt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-helloworld/project/assembly.sbt b/examples/experimental/scala-local-helloworld/project/assembly.sbt deleted file mode 100644 index 54c3252..0000000 --- a/examples/experimental/scala-local-helloworld/project/assembly.sbt +++ /dev/null @@ -1 +0,0 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-evaluation/build.sbt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-evaluation/build.sbt b/examples/experimental/scala-local-movielens-evaluation/build.sbt deleted file mode 100644 index ea04235..0000000 --- a/examples/experimental/scala-local-movielens-evaluation/build.sbt +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import AssemblyKeys._ - -assemblySettings - -name := "scala-local-movielens-evaluation" - -organization := "myorg" - -version := "0.0.1-SNAPSHOT" - -libraryDependencies ++= Seq( - "org.apache.predictionio" %% "core" % "0.9.1" % "provided", - "org.apache.predictionio" %% "engines" % "0.9.1" % "provided", - "org.apache.spark" %% "spark-core" % "1.2.0" % "provided") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-evaluation/engine.json ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-evaluation/engine.json b/examples/experimental/scala-local-movielens-evaluation/engine.json deleted file mode 100644 index e11e5de..0000000 --- a/examples/experimental/scala-local-movielens-evaluation/engine.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "id": "scala-local-movielens-evaluation", - "description": "scala-local-movielens-evaluation", - "engineFactory": "myorg.MyEngineFactory" -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-evaluation/project/assembly.sbt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-evaluation/project/assembly.sbt b/examples/experimental/scala-local-movielens-evaluation/project/assembly.sbt deleted file mode 100644 index 54c3252..0000000 --- a/examples/experimental/scala-local-movielens-evaluation/project/assembly.sbt +++ /dev/null @@ -1 +0,0 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-evaluation/src/main/scala/Evaluation.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-evaluation/src/main/scala/Evaluation.scala b/examples/experimental/scala-local-movielens-evaluation/src/main/scala/Evaluation.scala deleted file mode 100644 index d972fbe..0000000 --- a/examples/experimental/scala-local-movielens-evaluation/src/main/scala/Evaluation.scala +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.mlc - -import org.apache.predictionio.engines.itemrank.PreparatorParams -import org.apache.predictionio.engines.itemrank.EventsDataSourceParams -import org.apache.predictionio.engines.itemrank.ItemRankEngine -import org.apache.predictionio.engines.itemrank.ItemRankDetailedEvaluator -import org.apache.predictionio.engines.itemrank.DetailedEvaluatorParams -import org.apache.predictionio.engines.itemrank.MeasureType -import org.apache.predictionio.engines.itemrank.mahout.ItemBasedAlgoParams -import org.apache.predictionio.engines.base.AttributeNames -import org.apache.predictionio.engines.base.EventsSlidingEvalParams -import org.apache.predictionio.engines.base.BinaryRatingParams -import org.apache.predictionio.controller.WorkflowParams -import org.apache.predictionio.controller.Workflow -import org.apache.predictionio.controller.EngineParams - -import com.github.nscala_time.time.Imports._ - -object CommonParams { - val DataSourceAttributeNames = AttributeNames( - user = "pio_user", - item = "pio_item", - u2iActions = Set("rate"), - itypes = "pio_itypes", - starttime = "pio_starttime", - endtime = "pio_endtime", - inactive = "pio_inactive", - rating = "pio_rating") - - val PreparatorParams = new PreparatorParams( - actions = Map("rate" -> None), - conflict = "latest") - - val MahoutAlgoParams0 = new ItemBasedAlgoParams( - booleanData = true, - itemSimilarity = "LogLikelihoodSimilarity", - weighted = false, - nearestN = 10, - threshold = 4.9E-324, - numSimilarItems = 50, - numUserActions = 50, - freshness = 0, - freshnessTimeUnit = 86400, - recommendationTime = Some(DateTime.now.millis)) - - val CompleteDataSourceParams = EventsDataSourceParams( - appId = 9, - actions = Set("rate"), - attributeNames = CommonParams.DataSourceAttributeNames, - slidingEval = Some(new EventsSlidingEvalParams( - firstTrainingUntilTime = new DateTime(1998, 2, 1, 0, 0), - evalDuration = Duration.standardDays(7), - evalCount = 12))) -} - -object Evaluation1 { - def main(args: Array[String]) { - // Engine Settings - val engine = ItemRankEngine() - - val dsp = EventsDataSourceParams( - appId = 9, - actions = Set("rate"), - attributeNames = CommonParams.DataSourceAttributeNames, - slidingEval = Some(new EventsSlidingEvalParams( - firstTrainingUntilTime = new DateTime(1998, 2, 1, 0, 0), - evalDuration = Duration.standardDays(7), - evalCount = 3)) - ) - - val engineParams = new EngineParams( - dataSourceParams = dsp, - preparatorParams = CommonParams.PreparatorParams, - algorithmParamsList = Seq( - ("mahoutItemBased", CommonParams.MahoutAlgoParams0)) - ) - - // Evaluator Setting - val evaluatorParams = new DetailedEvaluatorParams( - ratingParams = new BinaryRatingParams( - actionsMap = Map("rate" -> None), - goodThreshold = 3), - measureType = MeasureType.PrecisionAtK, - measureK = 10 - ) - - // Run - Workflow.runEngine( - params = WorkflowParams(batch = "MLC: Evaluation1"), - engine = engine, - engineParams = engineParams, - evaluatorClassOpt = Some(classOf[ItemRankDetailedEvaluator]), - evaluatorParams = evaluatorParams - ) - } -} - -object Evaluation2 { - def main(args: Array[String]) { - // Engine Settings - val engine = ItemRankEngine() - - val engineParams = new EngineParams( - dataSourceParams = CommonParams.CompleteDataSourceParams, - preparatorParams = CommonParams.PreparatorParams, - algorithmParamsList = Seq( - ("mahoutItemBased", CommonParams.MahoutAlgoParams0)) - ) - - // Evaluator Setting - val evaluatorParams = new DetailedEvaluatorParams( - ratingParams = new BinaryRatingParams( - actionsMap = Map("rate" -> None), - goodThreshold = 3), - measureType = MeasureType.PrecisionAtK, - measureK = 10 - ) - - // Run - Workflow.runEngine( - params = WorkflowParams(batch = "MLC: Evaluation2"), - engine = engine, - engineParams = engineParams, - evaluatorClassOpt = Some(classOf[ItemRankDetailedEvaluator]), - evaluatorParams = evaluatorParams - ) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-evaluation/src/main/scala/ItemRecEvaluation.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-evaluation/src/main/scala/ItemRecEvaluation.scala b/examples/experimental/scala-local-movielens-evaluation/src/main/scala/ItemRecEvaluation.scala deleted file mode 100644 index a26f2de..0000000 --- a/examples/experimental/scala-local-movielens-evaluation/src/main/scala/ItemRecEvaluation.scala +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.mlc - -import org.apache.predictionio.engines.itemrec.ItemRecEngine -import org.apache.predictionio.engines.itemrec.EventsDataSourceParams -import org.apache.predictionio.engines.itemrec.PreparatorParams -import org.apache.predictionio.engines.itemrec.NCItemBasedAlgorithmParams -import org.apache.predictionio.engines.itemrec.EvalParams -import org.apache.predictionio.engines.itemrec.ItemRecEvaluator -import org.apache.predictionio.engines.itemrec.ItemRecEvaluatorParams -import org.apache.predictionio.engines.itemrec.MeasureType -import org.apache.predictionio.engines.base.EventsSlidingEvalParams -import org.apache.predictionio.engines.base.BinaryRatingParams - -import org.apache.predictionio.controller.EngineParams -import org.apache.predictionio.controller.Workflow -import org.apache.predictionio.controller.WorkflowParams - -import com.github.nscala_time.time.Imports._ - -// Recommend to run with "--driver-memory 2G" -object ItemRecEvaluation1 { - def main(args: Array[String]) { - val engine = ItemRecEngine() - - val dsp = EventsDataSourceParams( - appId = 9, - actions = Set("rate"), - attributeNames = CommonParams.DataSourceAttributeNames, - slidingEval = Some(new EventsSlidingEvalParams( - firstTrainingUntilTime = new DateTime(1998, 2, 1, 0, 0), - evalDuration = Duration.standardDays(7), - evalCount = 12)), - //evalCount = 3)), - evalParams = Some(new EvalParams(queryN = 10)) - ) - - val pp = new PreparatorParams( - actions = Map("rate" -> None), - seenActions = Set("rate"), - conflict = "latest") - - val ncMahoutAlgoParams = new NCItemBasedAlgorithmParams( - booleanData = true, - itemSimilarity = "LogLikelihoodSimilarity", - weighted = false, - threshold = 4.9E-324, - nearestN = 10, - unseenOnly = false, - freshness = 0, - freshnessTimeUnit = 86400) - - val engineParams = new EngineParams( - dataSourceParams = dsp, - preparatorParams = pp, - algorithmParamsList = Seq( - ("ncMahoutItemBased", ncMahoutAlgoParams))) - - val evaluatorParams = new ItemRecEvaluatorParams( - ratingParams = new BinaryRatingParams( - actionsMap = Map("rate" -> None), - goodThreshold = 3), - measureType = MeasureType.PrecisionAtK, - measureK = 10 - ) - - Workflow.runEngine( - params = WorkflowParams(batch = "MLC: ItemRec Evaluation1", verbose = 0), - engine = engine, - engineParams = engineParams, - evaluatorClassOpt = Some(classOf[ItemRecEvaluator]), - evaluatorParams = evaluatorParams - ) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-filtering/blacklisted.txt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-filtering/blacklisted.txt b/examples/experimental/scala-local-movielens-filtering/blacklisted.txt deleted file mode 100644 index 2d959ca..0000000 --- a/examples/experimental/scala-local-movielens-filtering/blacklisted.txt +++ /dev/null @@ -1,2 +0,0 @@ -272 -123 http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-filtering/build.sbt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-filtering/build.sbt b/examples/experimental/scala-local-movielens-filtering/build.sbt deleted file mode 100644 index 699c90c..0000000 --- a/examples/experimental/scala-local-movielens-filtering/build.sbt +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import AssemblyKeys._ - -assemblySettings - -name := "scala-local-movielens-filtering" - -organization := "myorg" - -version := "0.0.1-SNAPSHOT" - -libraryDependencies ++= Seq( - "org.apache.predictionio" %% "core" % "0.9.1" % "provided", - "org.apache.predictionio" %% "engines" % "0.9.1" % "provided", - "org.apache.spark" %% "spark-core" % "1.2.0" % "provided") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-filtering/engine.json ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-filtering/engine.json b/examples/experimental/scala-local-movielens-filtering/engine.json deleted file mode 100644 index 4938c15..0000000 --- a/examples/experimental/scala-local-movielens-filtering/engine.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "id": "scala-local-movielens-filtering", - "description": "scala-local-movielens-filtering", - "engineFactory": "myorg.TempFilterEngine", - "datasource": { - "params": { - "appId": YOUR_APP_ID, - "actions": [ - "view", - "like", - "dislike", - "conversion", - "rate" - ], - "attributeNames": { - "user" : "pio_user", - "item" : "pio_item", - "u2iActions": [ - "view", - "like", - "dislike", - "conversion", - "rate" - ], - "itypes" : "pio_itypes", - "starttime" : "pio_starttime", - "endtime" : "pio_endtime", - "inactive" : "pio_inactive", - "rating" : "pio_rating" - } - } - }, - "preparator": { - "params": { - "actions": { - "view": 3, - "like": 5, - "dislike": 1, - "conversion": 4, - "rate": null - }, - "conflict": "latest" - } - }, - "algorithms": [ - { - "name": "ncMahoutItemBased", - "params": { - "booleanData": true, - "itemSimilarity": "LogLikelihoodSimilarity", - "weighted": false, - "threshold": 4.9E-324, - "nearestN": 10, - "unseenOnly": false, - "freshness" : 0, - "freshnessTimeUnit" : 86400 - } - } - ], - "serving": { - "params": { - "filepath": FULL_PATH_TO_BLACKLISTED_FILE - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-filtering/project/assembly.sbt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-filtering/project/assembly.sbt b/examples/experimental/scala-local-movielens-filtering/project/assembly.sbt deleted file mode 100644 index 54c3252..0000000 --- a/examples/experimental/scala-local-movielens-filtering/project/assembly.sbt +++ /dev/null @@ -1 +0,0 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-filtering/src/main/scala/Engine.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-filtering/src/main/scala/Engine.scala b/examples/experimental/scala-local-movielens-filtering/src/main/scala/Engine.scala deleted file mode 100644 index 1710661..0000000 --- a/examples/experimental/scala-local-movielens-filtering/src/main/scala/Engine.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package myorg - -import org.apache.predictionio.controller.Engine -import org.apache.predictionio.controller.IEngineFactory -import org.apache.predictionio.engines.itemrec.EventsDataSource -import org.apache.predictionio.engines.itemrec.ItemRecPreparator -import org.apache.predictionio.engines.itemrec.NCItemBasedAlgorithm - -object TempFilterEngine extends IEngineFactory { - def apply() = { - new Engine( - classOf[EventsDataSource], - classOf[ItemRecPreparator], - Map("ncMahoutItemBased" -> classOf[NCItemBasedAlgorithm]), - classOf[TempFilter] - ) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-movielens-filtering/src/main/scala/Filtering.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-movielens-filtering/src/main/scala/Filtering.scala b/examples/experimental/scala-local-movielens-filtering/src/main/scala/Filtering.scala deleted file mode 100644 index 83f5866..0000000 --- a/examples/experimental/scala-local-movielens-filtering/src/main/scala/Filtering.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package myorg - -import org.apache.predictionio.controller.LServing -import org.apache.predictionio.controller.Params -import org.apache.predictionio.engines.itemrec.Prediction -import org.apache.predictionio.engines.itemrec.Query -import scala.io.Source - -case class TempFilterParams(val filepath: String) extends Params - -class TempFilter(val params: TempFilterParams) - extends LServing[TempFilterParams, Query, Prediction] { - override def serve(query: Query, predictions: Seq[Prediction]): Prediction = { - val disabledIids: Set[String] = Source.fromFile(params.filepath) - .getLines() - .toSet - - val prediction = predictions.head - // prediction.items is a list of (item_id, score)-tuple - prediction.copy(items = prediction.items.filter(e => !disabledIids(e._1))) - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-regression/README.md ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-regression/README.md b/examples/experimental/scala-local-regression/README.md deleted file mode 100644 index 5c0dafe..0000000 --- a/examples/experimental/scala-local-regression/README.md +++ /dev/null @@ -1,165 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to You under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. ---> - -Linear Regression Engine -======================== - -This document describes a Scala-based single-machine linear regression engine. - - -Prerequisite ------------- - -Make sure you have built PredictionIO and setup storage described -[here](/README.md). - - -High Level Description ----------------------- - -This engine demonstrates how one can simply wrap around the -[Nak](https://github.com/scalanlp/nak) library to train a linear regression -model and serve real-time predictions. - -All code definition can be found [here](Run.scala). - - -### Data Source - -Training data is located at `/examples/data/lr_data.txt`. The first column are -values of the dependent variable, and the rest are values of explanatory -variables. In this example, they are represented by the `TrainingData` case -class as a vector of double (all rows of the first column), and a vector of -vector of double (all rows of the remaining columns) respectively. - - -### Preparator - -The preparator in this example accepts two parameters: `n` and `k`. Each row of -data is indexed by `index` starting from 0. When `n > 0`, rows matching `index -mod n = k` will be dropped. - - -### Algorithm - -This example engine contains one single algorithm that wraps around the Nak -library's linear regression routine. The `train()` method simply massage the -`TrainingData` into a form that can be used by Nak. - - -### Serving - -This example engine uses `FirstServing`, which serves only predictions from the -first algorithm. Since there is only one algorithm in this engine, predictions -from the linear regression algorithm will be served. - - -Training a Model ----------------- - -This example provides a set of ready-to-use parameters for each component -mentioned in the previous section. They are located inside the `params` -subdirectory. - -Before training, you must let PredictionIO know about the engine. Run the -following command to build and register the engine. -``` -$ cd $PIO_HOME/examples/scala-local-regression -$ ../../bin/pio build -``` -where `$PIO_HOME` is the root directory of the PredictionIO code tree. - -To start training, use the following command. -``` -$ cd $PIO_HOME/examples/scala-local-regression -$ ../../bin/pio train -``` -This will train a model and save it in PredictionIO's metadata storage. Notice -that when the run is completed, it will display a run ID, like below. -``` -2014-08-08 17:18:09,399 INFO SparkContext - Job finished: collect at DebugWorkflow.scala:571, took 0.046796 s -2014-08-08 17:18:09,399 INFO APIDebugWorkflow$ - Metrics is null. Stop here -2014-08-08 17:18:09,498 INFO APIDebugWorkflow$ - Saved engine instance with ID: CHURP-cvQta5VKxorx_9Aw -``` - - -Running Evaluation Metrics --------------------------- - -To run evaluation metrics, use the following command. -``` -$ cd $PIO_HOME/examples/scala-local-regression -$ ../../bin/pio eval --metrics-class org.apache.predictionio.controller.MeanSquareError -``` -Notice the extra required argument `--metrics-class -org.apache.predictionio.controller.MeanSquareError` for the `eval` command. This instructs -PredictionIO to run the specified metrics during evaluation. When you look at -the console output again, you should be able to see a mean square error -computed, like the following. -``` -2014-08-08 17:21:01,042 INFO APIDebugWorkflow$ - Set: The One Size: 1000 MSE: 0.092519 -2014-08-08 17:21:01,042 INFO APIDebugWorkflow$ - APIDebugWorkflow.run completed. -2014-08-08 17:21:01,140 INFO APIDebugWorkflow$ - Saved engine instance with ID: icfEp9njR76NQOrvowC-dQ -``` - - -Deploying a Real-time Prediction Server ---------------------------------------- - -Following from instructions above, you should have trained a model. Use the -following command to start a server. -``` -$ cd $PIO_HOME/examples/scala-local-regression -$ ../../bin/pio deploy -``` -This will create a server that by default binds to http://localhost:8000. You -can visit that page in your web browser to check its status. - -To perform real-time predictions, try the following. -``` -$ curl -H "Content-Type: application/json" -d '[2.1419053154730548, 1.919407948982788, 0.0501333631091041, -0.10699028639933772, 1.2809776380727795, 1.6846227956326554, 0.18277859260127316, -0.39664340267804343, 0.8090554869291249, 2.48621339239065]' http://localhost:8000/queries.json -$ curl -H "Content-Type: application/json" -d '[-0.8600615539670898, -1.0084357652346345, -1.3088407119560064, -1.9340485539299312, -0.6246990990796732, -2.325746651211032, -0.28429904752434976, -0.1272785164794058, -1.3787859877532718, -0.24374419289538318]' http://localhost:8000/queries.json -``` -Congratulations! You have just trained a linear regression model and is able to -perform real time prediction. - - -Production Prediction Server Deployment ---------------------------------------- - -Prediction servers support reloading models on the fly with the latest completed -run. - -1. Assuming you already have a running prediction server from the previous - section, go to http://localhost:8000 to check its status. Take note of the - **Run ID** at the top. - -2. Run training and deploy again. - - ``` - $ cd $PIO_HOME/examples/scala-local-regression - $ ../../bin/pio train - $ ../../bin/pio deploy - ``` - -3. Refresh the page at http://localhost:8000, you should see the prediction - server status page with a new **Run ID** at the top. - -Congratulations! You have just experienced a production-ready setup that can -reload itself automatically after every training! Simply add the training or -evaluation command to your *crontab*, and your setup will be able to re-deploy -itself automatically in a regular interval. http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-regression/Run.scala ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-regression/Run.scala b/examples/experimental/scala-local-regression/Run.scala deleted file mode 100644 index d9d3380..0000000 --- a/examples/experimental/scala-local-regression/Run.scala +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.predictionio.examples.regression.local - -import org.apache.predictionio.controller.EmptyParams -import org.apache.predictionio.controller.Engine -import org.apache.predictionio.controller.IEngineFactory -import org.apache.predictionio.controller.EngineParams -import org.apache.predictionio.controller.LFirstServing -import org.apache.predictionio.controller.LAlgorithm -import org.apache.predictionio.controller.LDataSource -import org.apache.predictionio.controller.LPreparator -import org.apache.predictionio.controller.MeanSquareError -import org.apache.predictionio.controller.Params -import org.apache.predictionio.controller.Utils -import org.apache.predictionio.controller.Workflow -import org.apache.predictionio.controller.WorkflowParams - -import breeze.linalg.DenseMatrix -import breeze.linalg.DenseVector -import breeze.linalg.inv -import nak.regress.LinearRegression -import org.json4s._ - -import scala.io.Source -import java.io.File - -case class DataSourceParams(val filepath: String, val seed: Int = 9527) - extends Params - -case class TrainingData(x: Vector[Vector[Double]], y: Vector[Double]) { - val r = x.length - val c = x.head.length -} - -case class LocalDataSource(val dsp: DataSourceParams) - extends LDataSource[ - DataSourceParams, String, TrainingData, Vector[Double], Double] { - override - def read(): Seq[(String, TrainingData, Seq[(Vector[Double], Double)])] = { - val lines = Source.fromFile(dsp.filepath).getLines - .toSeq.map(_.split(" ", 2)) - - // FIXME: Use different training / testing data. - val x = lines.map{ _(1).split(' ').map{_.toDouble} }.map{ e => Vector(e:_*)} - val y = lines.map{ _(0).toDouble } - - val td = TrainingData(Vector(x:_*), Vector(y:_*)) - - val oneData = ("The One", td, x.zip(y)) - return Seq(oneData) - } -} - -// When n = 0, don't drop data -// When n > 0, drop data when index mod n == k -case class PreparatorParams(n: Int = 0, k: Int = 0) extends Params - -case class LocalPreparator(val pp: PreparatorParams = PreparatorParams()) - extends LPreparator[PreparatorParams, TrainingData, TrainingData] { - def prepare(td: TrainingData): TrainingData = { - val xyi: Vector[(Vector[Double], Double)] = td.x.zip(td.y) - .zipWithIndex - .filter{ e => (e._2 % pp.n) != pp.k} - .map{ e => (e._1._1, e._1._2) } - TrainingData(xyi.map(_._1), xyi.map(_._2)) - } -} - -case class LocalAlgorithm() - extends LAlgorithm[ - EmptyParams, TrainingData, Array[Double], Vector[Double], Double] { - - def train(td: TrainingData): Array[Double] = { - val xArray: Array[Double] = td.x.foldLeft(Vector[Double]())(_ ++ _).toArray - // DenseMatrix.create fills first column, then second. - val m = DenseMatrix.create[Double](td.c, td.r, xArray).t - val y = DenseVector[Double](td.y.toArray) - val result = LinearRegression.regress(m, y) - return result.data.toArray - } - - def predict(model: Array[Double], query: Vector[Double]) = { - model.zip(query).map(e => e._1 * e._2).sum - } - - @transient override lazy val querySerializer = - Utils.json4sDefaultFormats + new VectorSerializer -} - -class VectorSerializer extends CustomSerializer[Vector[Double]](format => ( - { - case JArray(s) => - s.map { - case JDouble(x) => x - case _ => 0 - }.toVector - }, - { - case x: Vector[Double] => - JArray(x.toList.map(y => JDouble(y))) - } -)) - -object RegressionEngineFactory extends IEngineFactory { - def apply() = { - new Engine( - classOf[LocalDataSource], - classOf[LocalPreparator], - Map("" -> classOf[LocalAlgorithm]), - classOf[LFirstServing[Vector[Double], Double]]) - } -} - -object Run { - val workflowParams = WorkflowParams( - batch = "Imagine: Local Regression", - verbose = 3, - saveModel = true) - - def runComponents() { - val filepath = new File("../data/lr_data.txt").getCanonicalPath - val dataSourceParams = new DataSourceParams(filepath) - val preparatorParams = new PreparatorParams(n = 2, k = 0) - - Workflow.run( - params = workflowParams, - dataSourceClassOpt = Some(classOf[LocalDataSource]), - dataSourceParams = dataSourceParams, - preparatorClassOpt = Some(classOf[LocalPreparator]), - preparatorParams = preparatorParams, - algorithmClassMapOpt = Some(Map("" -> classOf[LocalAlgorithm])), - algorithmParamsList = Seq( - ("", EmptyParams())), - servingClassOpt = Some(classOf[LFirstServing[Vector[Double], Double]]), - evaluatorClassOpt = Some(classOf[MeanSquareError])) - } - - def runEngine() { - val filepath = new File("../data/lr_data.txt").getCanonicalPath - val engine = RegressionEngineFactory() - val engineParams = new EngineParams( - dataSourceParams = DataSourceParams(filepath), - preparatorParams = PreparatorParams(n = 2, k = 0), - algorithmParamsList = Seq(("", EmptyParams()))) - - Workflow.runEngine( - params = workflowParams, - engine = engine, - engineParams = engineParams, - evaluatorClassOpt = Some(classOf[MeanSquareError])) - } - - def main(args: Array[String]) { - runEngine() - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-regression/build.sbt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-regression/build.sbt b/examples/experimental/scala-local-regression/build.sbt deleted file mode 100644 index 4db53c6..0000000 --- a/examples/experimental/scala-local-regression/build.sbt +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import AssemblyKeys._ - -assemblySettings - -name := "example-scala-local-regression" - -organization := "org.apache.predictionio" - -libraryDependencies ++= Seq( - "org.apache.predictionio" %% "core" % "0.9.1" % "provided", - "org.apache.spark" %% "spark-core" % "1.2.0" % "provided", - "org.json4s" %% "json4s-native" % "3.2.10", - "org.scalanlp" %% "nak" % "1.3") http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-regression/engine.json ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-regression/engine.json b/examples/experimental/scala-local-regression/engine.json deleted file mode 100644 index eaa8f14..0000000 --- a/examples/experimental/scala-local-regression/engine.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "id": "default", - "description": "Default settings", - "engineFactory": "org.apache.predictionio.examples.regression.local.RegressionEngineFactory", - "datasource": { - "params": { - "filepath": "../data/lr_data.txt", - "seed": 1337 - } - }, - "preparator": { - "params": { - "n": 2, - "k": 0 - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/dec9f84c/examples/experimental/scala-local-regression/project/assembly.sbt ---------------------------------------------------------------------- diff --git a/examples/experimental/scala-local-regression/project/assembly.sbt b/examples/experimental/scala-local-regression/project/assembly.sbt deleted file mode 100644 index 54c3252..0000000 --- a/examples/experimental/scala-local-regression/project/assembly.sbt +++ /dev/null @@ -1 +0,0 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2")
