Fixed indentation Deleted obsolete examples Moved obsolete test suite to 'test-obsolete' (port as we go) Added tox test bootstrapping Ignored pycharm, test output Removed obsolete (<0.8) support PEP8/Flake + py2&3 support Bumped version
Project: http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/commit/bc678328 Tree: http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/tree/bc678328 Diff: http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/diff/bc678328 Branch: refs/heads/master Commit: bc6783282a198ea417e84c1bbc882aa41dd3ffae Parents: 7b66a7a Author: Filipe Varela <[email protected]> Authored: Tue Jan 13 17:29:45 2015 +0000 Committer: Filipe Varela <[email protected]> Committed: Thu Jan 15 12:20:40 2015 +0000 ---------------------------------------------------------------------- .gitignore | 4 + docs/source/conf.py | 108 +- examples/demo-movielens/appdata.py | 316 ++--- examples/demo-movielens/batch_import.py | 246 ++-- examples/event_sample.py | 50 +- examples/import_yahoo.py | 261 ++-- examples/itemrank_quick_query.py | 18 +- examples/itemrank_quick_start.py | 69 +- examples/obsolete/__init__.py | 0 examples/obsolete/itemrec/__init__.py | 0 examples/obsolete/itemrec/movies/.gitignore | 3 - examples/obsolete/itemrec/movies/README.md | 15 - examples/obsolete/itemrec/movies/__init__.py | 0 examples/obsolete/itemrec/movies/app_config.py | 2 - examples/obsolete/itemrec/movies/appdata.py | 148 --- .../obsolete/itemrec/movies/batch_import.py | 65 - .../obsolete/itemrec/movies/movie_rec_app.py | 152 --- predictionio/__init__.py | 809 ++++++------ predictionio/connection.py | 584 +++++---- predictionio/obsolete.py | 1205 ------------------ setup.py | 32 +- tests-obsolete/conversion_test.py | 16 + tests-obsolete/import_testdata.py | 46 + tests-obsolete/import_testdata_id_mismatch.py | 46 + tests-obsolete/import_testdata_special_char.py | 46 + tests-obsolete/predictionio_itemrec_test.py | 336 +++++ .../predictionio_itemrec_test_special_char.py | 366 ++++++ tests-obsolete/predictionio_itemsim_test.py | 200 +++ tests-obsolete/predictionio_test.py | 257 ++++ tests/.keep | 0 tests/conversion_test.py | 23 - tests/import_testdata.py | 52 - tests/import_testdata_id_mismatch.py | 52 - tests/import_testdata_special_char.py | 52 - tests/predictionio_itemrec_test.py | 300 ----- tests/predictionio_itemrec_test_special_char.py | 291 ----- tests/predictionio_itemsim_test.py | 160 --- tests/predictionio_test.py | 246 ---- tox.ini | 9 + 39 files changed, 2573 insertions(+), 4012 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/.gitignore ---------------------------------------------------------------------- diff --git a/.gitignore b/.gitignore index 66fa4a6..1822229 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +*.egg-info +.idea +.tox +.coverage *.pyc .project .pydevproject http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/docs/source/conf.py ---------------------------------------------------------------------- diff --git a/docs/source/conf.py b/docs/source/conf.py index cbcb864..539cf04 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -11,13 +11,13 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import sys # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) -#sys.path.append("..") +# sys.path.insert(0, os.path.abspath('.')) +# sys.path.append("..") # Use path in the SDK. Hence need to override system package path by inserting # to position 0. sys.path.insert(0, "..") @@ -25,7 +25,7 @@ sys.path.insert(0, "..") # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. @@ -38,7 +38,7 @@ templates_path = ['_templates'] source_suffix = '.rst' # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' @@ -58,37 +58,37 @@ release = '0.8.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- @@ -100,72 +100,72 @@ html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # "<project> v<release> documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] +# html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a <link> tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'PredictionIO-Python-SDKdoc' @@ -174,42 +174,42 @@ htmlhelp_basename = 'PredictionIO-Python-SDKdoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', + # The paper size ('letterpaper' or 'a4paper'). + # 'papersize': 'letterpaper', -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'PredictionIO-Python-SDK.tex', u'PredictionIO-Python-SDK Documentation', - u'TappingStone', 'manual'), + ('index', 'PredictionIO-Python-SDK.tex', u'PredictionIO-Python-SDK Documentation', + u'TappingStone', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- @@ -222,27 +222,27 @@ man_pages = [ ] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, -# dir menu entry, description, category) +# dir menu entry, description, category) texinfo_documents = [ - ('index', 'PredictionIO-Python-SDK', u'PredictionIO-Python-SDK Documentation', - u'TappingStone', 'PredictionIO-Python-SDK', 'One line description of project.', - 'Miscellaneous'), + ('index', 'PredictionIO-Python-SDK', u'PredictionIO-Python-SDK Documentation', + u'TappingStone', 'PredictionIO-Python-SDK', 'One line description of project.', + 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' autodoc_member_order = 'bysource' http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/demo-movielens/appdata.py ---------------------------------------------------------------------- diff --git a/examples/demo-movielens/appdata.py b/examples/demo-movielens/appdata.py index d9637ef..354f31e 100644 --- a/examples/demo-movielens/appdata.py +++ b/examples/demo-movielens/appdata.py @@ -1,6 +1,5 @@ - import datetime -from operator import itemgetter, attrgetter +from operator import attrgetter # can get sample data here: # wget http://www.grouplens.org/system/files/ml-100k.zip @@ -15,169 +14,170 @@ RATE_ACTIONS_DELIMITER = "\t" class User: - def __init__(self, uid): - self.uid = uid - self.rec = [] # recommendations, list of iid + def __init__(self, uid): + self.uid = uid + self.rec = [] # recommendations, list of iid + + def __str__(self): + return "User[uid=%s,rec=%s]" % (self.uid, self.rec) - def __str__(self): - return "User[uid=%s,rec=%s]" % (self.uid, self.rec) class Item: - def __init__(self, iid, name, release_date, genres, year): - self.iid = iid - self.name = name - self.release_date = release_date # datetime.datetime object - self.genres = genres - self.year = year + def __init__(self, iid, name, release_date, genres, year): + self.iid = iid + self.name = name + self.release_date = release_date # datetime.datetime object + self.genres = genres + self.year = year + + def __str__(self): + return "Item[iid=%s,name=%s,release_date=%s,genres=%s]" % (self.iid, self.name, self.release_date, self.genres) - def __str__(self): - return "Item[iid=%s,name=%s,release_date=%s,genres=%s]" % (self.iid, self.name, self.release_date, self.genres) class RateAction: - def __init__(self, uid, iid, rating, t): - self.uid = uid - self.iid = iid - self.rating = rating - self.t = t + def __init__(self, uid, iid, rating, t): + self.uid = uid + self.iid = iid + self.rating = rating + self.t = t - def __str__(self): - return "RateAction[uid=%s,iid=%s,rating=%s,t=%s]" % (self.uid, self.iid, self.rating, self.t) + def __str__(self): + return "RateAction[uid=%s,iid=%s,rating=%s,t=%s]" % (self.uid, self.iid, self.rating, self.t) class AppData: - - def __init__(self): - self._users = {} # dict of User obj - self._items = {} # dict of Item obj - self._rate_actions = [] # list of RateAction obj - - self._users_file = "%s/%s" % (APPDATA_DIRNAME, USERS_FILENAME) - self._items_file = "%s/%s" % (APPDATA_DIRNAME, ITEMS_FILENAME) - self._rate_actions_file = "%s/%s" % (APPDATA_DIRNAME, RATE_ACTIONS_FILENAME) - self.__init_users() - self.__init_items() - self.__init_rate_actions() - - def __init_users(self): - """ - uid| - """ - print "[Info] Initializing users..." - f = open(self._users_file, 'r') - for line in f: - data = line.rstrip('\r\n').split(USERS_FILE_DELIMITER) - self.add_user(User(data[0])) - f.close() - print "[Info] %s users were initialized." % len(self._users) - - def __init_items(self): - """ - movie id | movie title | release date | video release date | - IMDb URL | unknown | Action | Adventure | Animation | - Children's | Comedy | Crime | Documentary | Drama | Fantasy | - Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | - Thriller | War | Western | - The last 19 fields are the genres, a 1 indicates the movie - is of that genre, a 0 indicates it is not; movies can be in - several genres at once. - - """ - genre_names = [ "unknown", "Action", "Adventure", "Animation", - "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", - "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", - "Thriller", "War", "Western"] - - print "[Info] Initializing items..." - f = open(self._items_file, 'r') - for line in f: - data = line.rstrip('\r\n').split(ITEMS_FILE_DELIMITER) - genres_flags = data[5:24] - - genres = () # tuple of genres - for g,flag in zip(genre_names, genres_flags): - if flag == '1': - genres = genres + (g,) - - try: - # eg. 01-Jan-1994 - release_date = datetime.datetime.strptime(data[2], "%d-%b-%Y").replace(microsecond=1) - (day, month, year) = data[2].split('-') - except: - print "[Note] item %s %s doesn't have release date. Skip it." % (data[0], data[1]) - else: - self.add_item(Item( - iid=data[0], - name=data[1], - release_date=release_date, - genres=genres, - year=year)) - f.close() - print "[Info] %s items were initialized." % len(self._items) - - def __init_rate_actions(self): - """ - uid|iid|rating|timestamp - """ - print "[Info] Initializing rate actions..." - f = open(self._rate_actions_file, 'r') - for line in f: - data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER) - t = datetime.datetime.utcfromtimestamp(int(data[3])).replace(microsecond=1) - self.add_rate_action(RateAction(data[0], data[1], data[2], t)) - f.close() - print "[Info] %s rate actions were initialized." % len(self._rate_actions) - - def add_user(self, user): - self._users[user.uid] = user - - def add_item(self, item): - self._items[item.iid] = item - - def add_rate_action(self, action): - self._rate_actions.append(action) - - def get_users(self): - return self._users - - def get_items(self): - return self._items - - def get_rate_actions(self): - return self._rate_actions - - def get_user(self, uid): - """return single user - """ - if uid in self._users: - return self._users[uid] - else: - return None - - def get_item(self, iid): - """return single item - """ - if iid in self._items: - return self._items[iid] - else: - return None - - def get_top_rated_items(self, uid, n): - """get top n rated iids by this uid - """ - if uid in self._users: - actions = filter(lambda u: u.uid==uid, self._rate_actions) - top = sorted(actions, key=attrgetter('rating'), reverse=True) - topn_iids = map(lambda a: a.iid, top[:n]) - return topn_iids - else: - return None - - def get_top_rate_actions(self, uid, n): - """get top n rated actions by this uid - """ - if uid in self._users: - actions = filter(lambda u: u.uid==uid, self._rate_actions) - top = sorted(actions, key=attrgetter('rating'), reverse=True) - return top[:n] - else: - return None + def __init__(self): + self._users = {} # dict of User obj + self._items = {} # dict of Item obj + self._rate_actions = [] # list of RateAction obj + + self._users_file = "%s/%s" % (APPDATA_DIRNAME, USERS_FILENAME) + self._items_file = "%s/%s" % (APPDATA_DIRNAME, ITEMS_FILENAME) + self._rate_actions_file = "%s/%s" % (APPDATA_DIRNAME, RATE_ACTIONS_FILENAME) + self.__init_users() + self.__init_items() + self.__init_rate_actions() + + def __init_users(self): + """ + uid| + """ + print("[Info] Initializing users...") + f = open(self._users_file, 'r') + for line in f: + data = line.rstrip('\r\n').split(USERS_FILE_DELIMITER) + self.add_user(User(data[0])) + f.close() + print("[Info] %s users were initialized." % len(self._users)) + + def __init_items(self): + """ + movie id | movie title | release date | video release date | + IMDb URL | unknown | Action | Adventure | Animation | + Children's | Comedy | Crime | Documentary | Drama | Fantasy | + Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | + Thriller | War | Western | + The last 19 fields are the genres, a 1 indicates the movie + is of that genre, a 0 indicates it is not; movies can be in + several genres at once. + + """ + genre_names = ["unknown", "Action", "Adventure", "Animation", + "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", + "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", + "Thriller", "War", "Western"] + + print("[Info] Initializing items...") + f = open(self._items_file, 'r') + for line in f: + data = line.rstrip('\r\n').split(ITEMS_FILE_DELIMITER) + genres_flags = data[5:24] + + genres = () # tuple of genres + for g, flag in zip(genre_names, genres_flags): + if flag == '1': + genres = genres + (g,) + + try: + # eg. 01-Jan-1994 + release_date = datetime.datetime.strptime(data[2], "%d-%b-%Y").replace(microsecond=1) + (day, month, year) = data[2].split('-') + except: + print("[Note] item %s %s doesn't have release date. Skip it." % (data[0], data[1])) + else: + self.add_item(Item( + iid=data[0], + name=data[1], + release_date=release_date, + genres=genres, + year=year)) + f.close() + print("[Info] %s items were initialized." % len(self._items)) + + def __init_rate_actions(self): + """ + uid|iid|rating|timestamp + """ + print("[Info] Initializing rate actions...") + f = open(self._rate_actions_file, 'r') + for line in f: + data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER) + t = datetime.datetime.utcfromtimestamp(int(data[3])).replace(microsecond=1) + self.add_rate_action(RateAction(data[0], data[1], data[2], t)) + f.close() + print("[Info] %s rate actions were initialized." % len(self._rate_actions)) + + def add_user(self, user): + self._users[user.uid] = user + + def add_item(self, item): + self._items[item.iid] = item + + def add_rate_action(self, action): + self._rate_actions.append(action) + + def get_users(self): + return self._users + + def get_items(self): + return self._items + + def get_rate_actions(self): + return self._rate_actions + + def get_user(self, uid): + """return single user + """ + if uid in self._users: + return self._users[uid] + else: + return None + + def get_item(self, iid): + """return single item + """ + if iid in self._items: + return self._items[iid] + else: + return None + + def get_top_rated_items(self, uid, n): + """get top n rated iids by this uid + """ + if uid in self._users: + actions = filter(lambda u: u.uid == uid, self._rate_actions) + top = sorted(actions, key=attrgetter('rating'), reverse=True) + topn_iids = map(lambda a: a.iid, top[:n]) + return topn_iids + else: + return None + + def get_top_rate_actions(self, uid, n): + """get top n rated actions by this uid + """ + if uid in self._users: + actions = filter(lambda u: u.uid == uid, self._rate_actions) + top = sorted(actions, key=attrgetter('rating'), reverse=True) + return top[:n] + else: + return None http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/demo-movielens/batch_import.py ---------------------------------------------------------------------- diff --git a/examples/demo-movielens/batch_import.py b/examples/demo-movielens/batch_import.py index f0fcb7c..af9c4f5 100644 --- a/examples/demo-movielens/batch_import.py +++ b/examples/demo-movielens/batch_import.py @@ -1,133 +1,135 @@ +import sys +import datetime + +import pytz from appdata import AppData import predictionio -import sys -import pytz -import datetime + def batch_import_task(app_data, client, all_info=False): - # event_time is an important properties used by the PredictionIO platform. It - # is particularly useful in generating training and testing set, which uses - # event_time for splitting. Hence, when we import data, better to make the - # event_time as approximate to fact as possible. - # - # However, in many cases, the data doesn't come with a time. Movie-lens' user - # data, for example, only reveals the age, gender, occupation, and zip code of - # a user. It doesn't report when the user is "created". Likewise, for items, - # it only reports the release date. - # - # To remedy this problem, we have to make some assumptions to the data. In - # this import script, the event_time for user is set to epoch=0, and the - # event_time for item is set to the release_date + 00:00:00 UTC. - - print "[Info] Importing users to PredictionIO..." - user_create_time = datetime.datetime.fromtimestamp(0, tz=pytz.utc) - count = 0 - set_user_request_list = [] - for k, v in app_data.get_users().iteritems(): - count += 1 - if all_info: - print "[Info] Importing %s..." % v - else: - if (count % 32 == 0): - sys.stdout.write('\r[Info] %s' % count) - sys.stdout.flush() - - set_user_request_list.append( - client.aset_user(uid=v.uid, event_time=user_create_time)) - - [r.get_response() for r in set_user_request_list] - sys.stdout.write('\r[Info] %s users were imported.\n' % count) - sys.stdout.flush() - - print "[Info] Importing items to PredictionIO..." - count = 0 - set_item_request_list = [] - # event_time is a datetime, hence need to add a time component to the release - # date. - midnight_utc = datetime.time(0, 0, 0, tzinfo=pytz.utc) - epoch = datetime.datetime.fromtimestamp(0, tz=pytz.utc) - for k, v in app_data.get_items().iteritems(): - count += 1 - if all_info: - print "[Info] Importing %s..." % v - else: - if (count % 32 == 0): - sys.stdout.write('\r[Info] %s' % count) - sys.stdout.flush() - - itypes = ("movie",) + v.genres - - release_datetime = datetime.datetime.combine( - v.release_date, - midnight_utc) - - # event_time must be after epoch. - event_time = release_datetime if release_datetime > epoch else epoch - - utf8_name = v.name.decode('utf-8', 'ignore') - - set_item_request = client.aset_item( - iid=v.iid, - event_time=event_time, - properties={ - "itypes": list(itypes), - "starttime": release_datetime.isoformat(), - "name": utf8_name, - "year": v.year } ) - - set_item_request_list.append(set_item_request) - - [r.get_response() for r in set_item_request_list] - sys.stdout.write('\r[Info] %s items were imported.\n' % count) - sys.stdout.flush() - - print "[Info] Importing rate actions to PredictionIO..." - count = 0 - create_event_request_list = [] - for v in app_data.get_rate_actions(): - count += 1 - if all_info: - print "[Info] Importing %s..." % v - else: - if (count % 32 == 0): - sys.stdout.write('\r[Info] %s' % count) - sys.stdout.flush() - - properties = { "rating" : int(v.rating) } - req = client.acreate_event( - event="rate", - entity_type="user", - entity_id=v.uid, - target_entity_type="item", - target_entity_id=v.iid, - properties=properties, - event_time=v.t.replace(tzinfo=pytz.utc), + # event_time is an important properties used by the PredictionIO platform. It + # is particularly useful in generating training and testing set, which uses + # event_time for splitting. Hence, when we import data, better to make the + # event_time as approximate to fact as possible. + # + # However, in many cases, the data doesn't come with a time. Movie-lens' user + # data, for example, only reveals the age, gender, occupation, and zip code of + # a user. It doesn't report when the user is "created". Likewise, for items, + # it only reports the release date. + # + # To remedy this problem, we have to make some assumptions to the data. In + # this import script, the event_time for user is set to epoch=0, and the + # event_time for item is set to the release_date + 00:00:00 UTC. + + print("[Info] Importing users to PredictionIO...") + user_create_time = datetime.datetime.fromtimestamp(0, tz=pytz.utc) + count = 0 + set_user_request_list = [] + for k, v in app_data.get_users().iteritems(): + count += 1 + if all_info: + print("[Info] Importing %s..." % v) + else: + if count % 32 == 0: + sys.stdout.write('\r[Info] %s' % count) + sys.stdout.flush() + + set_user_request_list.append( + client.aset_user(uid=v.uid, event_time=user_create_time)) + + [r.get_response() for r in set_user_request_list] + sys.stdout.write('\r[Info] %s users were imported.\n' % count) + sys.stdout.flush() + + print("[Info] Importing items to PredictionIO...") + count = 0 + set_item_request_list = [] + # event_time is a datetime, hence need to add a time component to the release + # date. + midnight_utc = datetime.time(0, 0, 0, tzinfo=pytz.utc) + epoch = datetime.datetime.fromtimestamp(0, tz=pytz.utc) + for k, v in app_data.get_items().iteritems(): + count += 1 + if all_info: + print("[Info] Importing %s..." % v) + else: + if count % 32 == 0: + sys.stdout.write('\r[Info] %s' % count) + sys.stdout.flush() + + itypes = ("movie",) + v.genres + + release_datetime = datetime.datetime.combine( + v.release_date, + midnight_utc) + + # event_time must be after epoch. + event_time = release_datetime if release_datetime > epoch else epoch + + utf8_name = v.name.decode('utf-8', 'ignore') + + set_item_request = client.aset_item( + iid=v.iid, + event_time=event_time, + properties={ + "itypes": list(itypes), + "starttime": release_datetime.isoformat(), + "name": utf8_name, + "year": v.year}) + + set_item_request_list.append(set_item_request) + + [r.get_response() for r in set_item_request_list] + sys.stdout.write('\r[Info] %s items were imported.\n' % count) + sys.stdout.flush() + + print("[Info] Importing rate actions to PredictionIO...") + count = 0 + create_event_request_list = [] + for v in app_data.get_rate_actions(): + count += 1 + if all_info: + print("[Info] Importing %s..." % v) + else: + if count % 32 == 0: + sys.stdout.write('\r[Info] %s' % count) + sys.stdout.flush() + + properties = {"rating": int(v.rating)} + req = client.acreate_event( + event="rate", + entity_type="user", + entity_id=v.uid, + target_entity_type="item", + target_entity_id=v.iid, + properties=properties, + event_time=v.t.replace(tzinfo=pytz.utc), ) - create_event_request_list.append(req) + create_event_request_list.append(req) - [r.get_response() for r in create_event_request_list] - sys.stdout.write('\r[Info] %s rate actions were imported.\n' % count) - sys.stdout.flush() + [r.get_response() for r in create_event_request_list] + sys.stdout.write('\r[Info] %s rate actions were imported.\n' % count) + sys.stdout.flush() if __name__ == '__main__': - if len(sys.argv) < 3: - sys.exit("Usage: python -m examples.demo-movielens.batch_import " - "<access_key> <url>") - - access_key = sys.argv[1] - - client = predictionio.EventClient( - access_key=access_key, - url=sys.argv[2], - threads=5, - qsize=500) - - # Test connection - print "Status:", client.get_status() - - app_data = AppData() - batch_import_task(app_data, client) - client.close() + if len(sys.argv) < 3: + sys.exit("Usage: python -m examples.demo-movielens.batch_import " + "<access_key> <url>") + + access_key = sys.argv[1] + + client = predictionio.EventClient( + access_key=access_key, + url=sys.argv[2], + threads=5, + qsize=500) + + # Test connection + print("Status:", client.get_status()) + + app_data = AppData() + batch_import_task(app_data, client) + client.close() http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/event_sample.py ---------------------------------------------------------------------- diff --git a/examples/event_sample.py b/examples/event_sample.py index 0a7a339..b76bcf7 100644 --- a/examples/event_sample.py +++ b/examples/event_sample.py @@ -1,10 +1,12 @@ -from predictionio import EventClient -from predictionio import NotFoundError from datetime import datetime + import pytz -import sys -access_key = None +from predictionio import EventClient +from predictionio import NotFoundError + + +access_key = "gDx1XuMUC9vu1YWWPRZkLRTftoq7m73mlj2MtnZEjncPlZ1JxUS2s7oajwP9xrZQ" assert access_key is not None, "Please create an access key with 'pio app new'" client = EventClient(access_key=access_key, url="http://localhost:7070") @@ -15,13 +17,13 @@ print(client.get_status()) # First event first_event_properties = { - "prop1" : 1, - "prop2" : "value2", - "prop3" : [1, 2, 3], - "prop4" : True, - "prop5" : ["a", "b", "c"], - "prop6" : 4.56 , - } + "prop1": 1, + "prop2": "value2", + "prop3": [1, 2, 3], + "prop4": True, + "prop5": ["a", "b", "c"], + "prop6": 4.56, +} first_event_time = datetime( 2004, 12, 13, 21, 39, 45, 618000, pytz.timezone('US/Mountain')) first_event_response = client.create_event( @@ -30,16 +32,16 @@ first_event_response = client.create_event( entity_id="uid", properties=first_event_properties, event_time=first_event_time, - ) +) print("First Event response") print(first_event_response) -print +print() # Second event second_event_properties = { - "someProperty" : "value1", - "anotherProperty" : "value2", - } + "someProperty": "value1", + "anotherProperty": "value2", +} second_event_response = client.create_event( event="my_event", entity_type="user", @@ -50,7 +52,7 @@ second_event_response = client.create_event( event_time=datetime(2014, 12, 13, 21, 38, 45, 618000, pytz.utc)) print("Second Event response") print(second_event_response) -print +print() # Get the first event from Event Server @@ -58,21 +60,21 @@ first_event_id = first_event_response.json_body["eventId"] print("Get Event") event = client.get_event(first_event_id) print(event) -print +print() # Delete the first event from Event Server print("Delete Event") delete_response = client.delete_event(first_event_id) print(delete_response) -print +print() # Delete the first event from Event Server again should yield exception. print("Delete Event Again") try: - delete_response = client.delete_event(first_event_id) -except NotFoundError, ex: - print("The expected error: {0}".format(ex)) -print + delete_response = client.delete_event(first_event_id) +except NotFoundError as ex: + print("The expected error: {0}".format(ex)) +print() # "user"-helper methods @@ -98,7 +100,7 @@ print(client.unset_user("foo", properties=foo_properties)) print("Delete user") print(client.delete_user("foo")) -# The SDK also support specifying the eventTime. It is useful for importing +# The SDK also support specifying the eventTime. It is useful for importing # events happened in the past. foo_time = datetime(2014, 8, 31, 4, 56, tzinfo=pytz.timezone('US/Pacific')) print("Create user at " + str(foo_time)) http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/import_yahoo.py ---------------------------------------------------------------------- diff --git a/examples/import_yahoo.py b/examples/import_yahoo.py index 93b367d..584a8c9 100644 --- a/examples/import_yahoo.py +++ b/examples/import_yahoo.py @@ -3,13 +3,14 @@ Import historical stock data from yahoo finance. """ from datetime import datetime +import sys + +import pytz + from pandas.io import data as pdata -import argparse import numpy import predictionio -import pytz -import sys -import time + EPOCH = datetime(1970, 1, 1, tzinfo=pytz.utc) @@ -63,151 +64,151 @@ SP500_LIST = [ "WU", "WY", "WYN", "WYNN", "X", "XEL", "XL", "XLNX", "XOM", "XRAY", "XRX", "XYL", "YHOO", "YUM", "ZION", "ZMH", "ZTS"] -ETF_LIST = ["QQQ", "SPY", "XLY", "XLP", "XLE", "XLF", "XLV", - "XLI", "XLB", "XLK", "XLU"] +ETF_LIST = ["QQQ", "SPY", "XLY", "XLP", "XLE", "XLF", "XLV", + "XLI", "XLB", "XLK", "XLU"] def since_epoch(dt): - return (dt - EPOCH).total_seconds() + return (dt - EPOCH).total_seconds() def import_data(client, access_key, ticker, start_time, end_time, event_time): - print "Importing:", ticker, start_time, end_time - - try: - df = pdata.DataReader(ticker, 'yahoo', start_time, end_time) - print "Extracted:", df.index[0], df.index[-1] - except IOError, ex: - print ex - print "Data not exist. Returning" - return - - # assume we only extract US data - eastern = pytz.timezone('US/Eastern') - - columns = [ - ('Open', 'open'), - ('High', 'high'), - ('Low', 'low'), - ('Close', 'close'), - ('Volume', 'volume'), - ('Adj Close', 'adjclose')] - - yahoo_data = dict() - yahoo_data['ticker'] = ticker - yahoo_data['t'] = [ - # hour=16 to indicate market close time - since_epoch(eastern.localize(date_.to_pydatetime().replace(hour=16))) - for date_ in df.index] - - for column in columns: - yahoo_data[column[1]] = map(numpy.asscalar, df[column[0]].values) - - properties = {'yahoo': yahoo_data} - - response = client.create_event( - event='$set', - entity_type='yahoo', - entity_id=ticker, - properties=properties, - event_time=event_time.replace(tzinfo=pytz.utc)) - - print(response) + print("Importing:", ticker, start_time, end_time) + + try: + df = pdata.DataReader(ticker, 'yahoo', start_time, end_time) + print("Extracted:", df.index[0], df.index[-1]) + except IOError as ex: + print(ex) + print("Data not exist. Returning") + return + + # assume we only extract US data + eastern = pytz.timezone('US/Eastern') + + columns = [ + ('Open', 'open'), + ('High', 'high'), + ('Low', 'low'), + ('Close', 'close'), + ('Volume', 'volume'), + ('Adj Close', 'adjclose')] + + yahoo_data = dict() + yahoo_data['ticker'] = ticker + yahoo_data['t'] = [ + # hour=16 to indicate market close time + since_epoch(eastern.localize(date_.to_pydatetime().replace(hour=16))) + for date_ in df.index] + + for column in columns: + yahoo_data[column[1]] = map(numpy.asscalar, df[column[0]].values) + + properties = {'yahoo': yahoo_data} + + response = client.create_event( + event='$set', + entity_type='yahoo', + entity_id=ticker, + properties=properties, + event_time=event_time.replace(tzinfo=pytz.utc)) + + print(response) def import_all(access_key): - """This method import all SP500 stocks and some SPDR ETFs.""" - time_slices = [ - (datetime(1999, 1, 1), datetime(2004, 1, 1), datetime(2004, 1, 2)), - (datetime(2003, 12, 1), datetime(2009, 1, 1), datetime(2009, 1, 2)), - (datetime(2008, 12, 1), datetime(2014, 9, 1), datetime(2014, 9, 2)), - ] + """This method import all SP500 stocks and some SPDR ETFs.""" + time_slices = [ + (datetime(1999, 1, 1), datetime(2004, 1, 1), datetime(2004, 1, 2)), + (datetime(2003, 12, 1), datetime(2009, 1, 1), datetime(2009, 1, 2)), + (datetime(2008, 12, 1), datetime(2014, 9, 1), datetime(2014, 9, 2)), + ] - url = 'http://localhost:7070' - client = predictionio.EventClient(access_key=access_key, threads=1, url=url) + url = 'http://localhost:7070' + client = predictionio.EventClient(access_key=access_key, threads=1, url=url) - tickers = SP500_LIST + ETF_LIST + tickers = SP500_LIST + ETF_LIST - for ticker in tickers: - for time_slice in time_slices: - import_data(client, access_key, ticker, - time_slice[0], time_slice[1], time_slice[2]) + for ticker in tickers: + for time_slice in time_slices: + import_data(client, access_key, ticker, + time_slice[0], time_slice[1], time_slice[2]) def import_data_with_gaps(access_key): - """This method import data with time gaps. - - Data imported by this method is used by stock engine, it demonsrates how it - can handle time series data with gaps. - """ - - # time_slices is discontinuted - # startTime, endTime, eventDate - time_slices = [ - (datetime(2013, 12, 1), datetime(2014, 2, 1), datetime(2014, 2, 2)), - (datetime(2014, 1, 1), datetime(2014, 1, 20), datetime(2014, 2, 10)), - (datetime(2014, 1, 10), datetime(2014, 2, 20), datetime(2014, 2, 28)), - (datetime(2014, 2, 10), datetime(2014, 3, 31), datetime(2014, 4, 2)), - (datetime(2014, 5, 1), datetime(2014, 6, 15), datetime(2014, 6, 20)), - (datetime(2014, 6, 1), datetime(2014, 7, 1), datetime(2014, 7, 15)), - ] - - tickers = ['SPY', 'AAPL', 'IBM', 'MSFT'] - - url = 'http://localhost:7070' - client = predictionio.EventClient(access_key=access_key, threads=1, url=url) - - for ticker in tickers: - for time_slice in time_slices: - import_data(client, access_key, ticker, - time_slice[0], time_slice[1], time_slice[2]) - - # below are data with holes - time_slices = [ - (datetime(2014, 1, 1), datetime(2014, 1, 20), datetime(2014, 2, 10)), - (datetime(2014, 2, 10), datetime(2014, 3, 31), datetime(2014, 4, 2)), - (datetime(2014, 6, 1), datetime(2014, 7, 1), datetime(2014, 7, 15)), - ] - - tickers = ['AMZN'] - for ticker in tickers: - for time_slice in time_slices: - import_data(client, access_key, ticker, - time_slice[0], time_slice[1], time_slice[2]) - - time_slices = [ - (datetime(2014, 1, 10), datetime(2014, 2, 20), datetime(2014, 2, 28)), - (datetime(2014, 2, 10), datetime(2014, 3, 31), datetime(2014, 4, 2)), - ] - tickers = ['FB'] - for ticker in tickers: - for time_slice in time_slices: - import_data(client, access_key, ticker, - time_slice[0], time_slice[1], time_slice[2]) + """This method import data with time gaps. + + Data imported by this method is used by stock engine, it demonsrates how it + can handle time series data with gaps. + """ + + # time_slices is discontinuted + # startTime, endTime, eventDate + time_slices = [ + (datetime(2013, 12, 1), datetime(2014, 2, 1), datetime(2014, 2, 2)), + (datetime(2014, 1, 1), datetime(2014, 1, 20), datetime(2014, 2, 10)), + (datetime(2014, 1, 10), datetime(2014, 2, 20), datetime(2014, 2, 28)), + (datetime(2014, 2, 10), datetime(2014, 3, 31), datetime(2014, 4, 2)), + (datetime(2014, 5, 1), datetime(2014, 6, 15), datetime(2014, 6, 20)), + (datetime(2014, 6, 1), datetime(2014, 7, 1), datetime(2014, 7, 15)), + ] + + tickers = ['SPY', 'AAPL', 'IBM', 'MSFT'] + + url = 'http://localhost:7070' + client = predictionio.EventClient(access_key=access_key, threads=1, url=url) + + for ticker in tickers: + for time_slice in time_slices: + import_data(client, access_key, ticker, + time_slice[0], time_slice[1], time_slice[2]) + + # below are data with holes + time_slices = [ + (datetime(2014, 1, 1), datetime(2014, 1, 20), datetime(2014, 2, 10)), + (datetime(2014, 2, 10), datetime(2014, 3, 31), datetime(2014, 4, 2)), + (datetime(2014, 6, 1), datetime(2014, 7, 1), datetime(2014, 7, 15)), + ] + + tickers = ['AMZN'] + for ticker in tickers: + for time_slice in time_slices: + import_data(client, access_key, ticker, + time_slice[0], time_slice[1], time_slice[2]) + + time_slices = [ + (datetime(2014, 1, 10), datetime(2014, 2, 20), datetime(2014, 2, 28)), + (datetime(2014, 2, 10), datetime(2014, 3, 31), datetime(2014, 4, 2)), + ] + tickers = ['FB'] + for ticker in tickers: + for time_slice in time_slices: + import_data(client, access_key, ticker, + time_slice[0], time_slice[1], time_slice[2]) def import_one(access_key): - """Import TSLA. - - Import data with from 2014-01-01 until 2014-03-01. event_time specifies when - this data is extracted. - """ - start_time = datetime(2014, 1, 1) - end_time = datetime(2014, 3, 1) - event_time = datetime(2014, 9, 1) - ticker = 'TSLA' - - url = 'http://localhost:7070' - client = predictionio.EventClient(access_key=access_key, threads=1, url=url) - - import_data(client, access_key, ticker, start_time, end_time, event_time) + """Import TSLA. + + Import data with from 2014-01-01 until 2014-03-01. event_time specifies when + this data is extracted. + """ + start_time = datetime(2014, 1, 1) + end_time = datetime(2014, 3, 1) + event_time = datetime(2014, 9, 1) + ticker = 'TSLA' + + url = 'http://localhost:7070' + client = predictionio.EventClient(access_key=access_key, threads=1, url=url) + + import_data(client, access_key, ticker, start_time, end_time, event_time) if __name__ == '__main__': - if len(sys.argv) < 2: - sys.exit("Usage: python -m examples.import_yahoo <access_key>") + if len(sys.argv) < 2: + sys.exit("Usage: python -m examples.import_yahoo <access_key>") - access_key = sys.argv[1] - import_all(access_key=access_key) - #import_data_with_gaps(access_key=access_key) - #import_one(access_key=access_key) + access_key = sys.argv[1] + import_all(access_key=access_key) + # import_data_with_gaps(access_key=access_key) + # import_one(access_key=access_key) http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/itemrank_quick_query.py ---------------------------------------------------------------------- diff --git a/examples/itemrank_quick_query.py b/examples/itemrank_quick_query.py index 3c43713..2beb29b 100644 --- a/examples/itemrank_quick_query.py +++ b/examples/itemrank_quick_query.py @@ -10,14 +10,14 @@ client = predictionio.EngineClient("http://localhost:8000") item_ids = [str(i) for i in range(1, 6)] user_ids = [str(x) for x in range(1, 6)] + ["NOT_EXIST_USER"] for user_id in user_ids: - print "Rank item 1 to 5 for user", user_id - try: - response = client.send_query({ - "uid": user_id, - "iids": item_ids - }) - print response - except predictionio.PredictionIOAPIError as e: - print 'Caught exception:', e + print("Rank item 1 to 5 for user ", user_id) + try: + response = client.send_query({ + "uid": user_id, + "iids": item_ids + }) + print(response) + except predictionio.PredictionIOAPIError as e: + print("Caught exception: ", e) client.close() http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/itemrank_quick_start.py ---------------------------------------------------------------------- diff --git a/examples/itemrank_quick_start.py b/examples/itemrank_quick_start.py index 6333ea0..a231373 100644 --- a/examples/itemrank_quick_start.py +++ b/examples/itemrank_quick_start.py @@ -2,44 +2,47 @@ itemrank quickstart import data """ -import predictionio - import random import sys +import predictionio + + def import_itemrank(access_key): + random.seed() + + client = predictionio.EventClient(access_key) + + print(client.get_status()) + + # generate 10 users, with user ids 1,2,....,10 + user_ids = [str(i) for i in range(1, 11)] + for user_id in user_ids: + print + "Set user", user_id + client.set_user(user_id) + + # generate 50 items, with item ids 1,2,....,50 + # assign type id 1 to all of them + item_ids = [str(i) for i in range(1, 51)] + for item_id in item_ids: + print + "Set item", item_id + client.set_item(item_id, { + "itypes": ['1'] + }) + + # each user randomly views 10 items + for user_id in user_ids: + for viewed_item in random.sample(item_ids, 10): + print + "User", user_id, "views item", viewed_item + client.record_user_action_on_item("view", user_id, viewed_item) - random.seed() - - client = predictionio.EventClient(access_key) - - print client.get_status() - - # generate 10 users, with user ids 1,2,....,10 - user_ids = [str(i) for i in range(1, 11)] - for user_id in user_ids: - print "Set user", user_id - client.set_user(user_id) - - # generate 50 items, with item ids 1,2,....,50 - # assign type id 1 to all of them - item_ids = [str(i) for i in range(1, 51)] - for item_id in item_ids: - print "Set item", item_id - client.set_item(item_id, { - "itypes" : ['1'] - }) - - # each user randomly views 10 items - for user_id in user_ids: - for viewed_item in random.sample(item_ids, 10): - print "User", user_id ,"views item", viewed_item - client.record_user_action_on_item("view", user_id, viewed_item) - - client.close() + client.close() if __name__ == '__main__': - if len(sys.argv) < 2: - sys.exit("Usage: python -m examples.itemrank_quick_start <access_key>") - import_itemrank(sys.argv[1]) + if len(sys.argv) < 2: + sys.exit("Usage: python -m examples.itemrank_quick_start <access_key>") + import_itemrank(sys.argv[1]) http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/obsolete/__init__.py ---------------------------------------------------------------------- diff --git a/examples/obsolete/__init__.py b/examples/obsolete/__init__.py deleted file mode 100644 index e69de29..0000000 http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/obsolete/itemrec/__init__.py ---------------------------------------------------------------------- diff --git a/examples/obsolete/itemrec/__init__.py b/examples/obsolete/itemrec/__init__.py deleted file mode 100644 index e69de29..0000000 http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/obsolete/itemrec/movies/.gitignore ---------------------------------------------------------------------- diff --git a/examples/obsolete/itemrec/movies/.gitignore b/examples/obsolete/itemrec/movies/.gitignore deleted file mode 100644 index 7b1ef3e..0000000 --- a/examples/obsolete/itemrec/movies/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -test/ -ml-100k/ -ml-100k.zip http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/obsolete/itemrec/movies/README.md ---------------------------------------------------------------------- diff --git a/examples/obsolete/itemrec/movies/README.md b/examples/obsolete/itemrec/movies/README.md deleted file mode 100644 index 9ef357a..0000000 --- a/examples/obsolete/itemrec/movies/README.md +++ /dev/null @@ -1,15 +0,0 @@ -PredictionIO Python SDK Example -=============================== - -Please execute all commands from repository root. - -Step 1. Get sample data and unzip it. - - > wget http://www.grouplens.org/system/files/ml-100k.zip - > unzip ml-100k.zip - -Step 2. Configurate examples/itemrec/movies/appdata.py - -Step 3. Run this app: - - > python -m examples.itemrec.movies.movie_rec_app http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/obsolete/itemrec/movies/__init__.py ---------------------------------------------------------------------- diff --git a/examples/obsolete/itemrec/movies/__init__.py b/examples/obsolete/itemrec/movies/__init__.py deleted file mode 100644 index e69de29..0000000 http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/obsolete/itemrec/movies/app_config.py ---------------------------------------------------------------------- diff --git a/examples/obsolete/itemrec/movies/app_config.py b/examples/obsolete/itemrec/movies/app_config.py deleted file mode 100644 index 5efa7a0..0000000 --- a/examples/obsolete/itemrec/movies/app_config.py +++ /dev/null @@ -1,2 +0,0 @@ -APP_KEY = 'uJKTKyUAFNZYQQO5yxkdrSo3XIlaf9LXejI63CWE0mtZVEYF89hyVtOwpMKfXXXX' -API_URL = 'http://localhost:8000' http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/obsolete/itemrec/movies/appdata.py ---------------------------------------------------------------------- diff --git a/examples/obsolete/itemrec/movies/appdata.py b/examples/obsolete/itemrec/movies/appdata.py deleted file mode 100644 index fb3c7fd..0000000 --- a/examples/obsolete/itemrec/movies/appdata.py +++ /dev/null @@ -1,148 +0,0 @@ - -import datetime -from operator import itemgetter, attrgetter - -# can get sample data here: -# wget http://www.grouplens.org/system/files/ml-100k.zip -# app data file config -APPDATA_DIRNAME = "ml-100k" -USERS_FILENAME = "u.user" -USERS_FILE_DELIMITER = "|" -ITEMS_FILENAME = "u.item" -ITEMS_FILE_DELIMITER = "|" -RATE_ACTIONS_FILENAME = "u.data" -RATE_ACTIONS_DELIMITER = "\t" - - -class User: - def __init__(self, uid): - self.uid = uid - self.rec = [] # recommendations, list of iid - - def __str__(self): - return "User[uid=%s,rec=%s]" % (self.uid, self.rec) - -class Item: - def __init__(self, iid, name): - self.iid = iid - self.name = name - - def __str__(self): - return "Item[iid=%s,name=%s]" % (self.iid, self.name) - -class RateAction: - def __init__(self, uid, iid, rating, t): - self.uid = uid - self.iid = iid - self.rating = rating - self.t = t - - def __str__(self): - return "RateAction[uid=%s,iid=%s,rating=%s,t=%s]" % (self.uid, self.iid, self.rating, self.t) - - -class AppData: - - def __init__(self): - self._users = {} # dict of User obj - self._items = {} # dict of Item obj - self._rate_actions = [] # list of RateAction obj - - self._users_file = "%s/%s" % (APPDATA_DIRNAME, USERS_FILENAME) - self._items_file = "%s/%s" % (APPDATA_DIRNAME, ITEMS_FILENAME) - self._rate_actions_file = "%s/%s" % (APPDATA_DIRNAME, RATE_ACTIONS_FILENAME) - self.__init_users() - self.__init_items() - self.__init_rate_actions() - - def __init_users(self): - """ - uid| - """ - print "[Info] Initializing users..." - f = open(self._users_file, 'r') - for line in f: - data = line.rstrip('\r\n').split(USERS_FILE_DELIMITER) - self.add_user(User(data[0])) - f.close() - print "[Info] %s users were initialized." % len(self._users) - - def __init_items(self): - """ - iid|name - """ - print "[Info] Initializing items..." - f = open(self._items_file, 'r') - for line in f: - data = line.rstrip('\r\n').split(ITEMS_FILE_DELIMITER) - self.add_item(Item(data[0], data[1])) - f.close() - print "[Info] %s items were initialized." % len(self._items) - - def __init_rate_actions(self): - """ - uid|iid|rating|timestamp - """ - print "[Info] Initializing rate actions..." - f = open(self._rate_actions_file, 'r') - for line in f: - data = line.rstrip('\r\n').split(RATE_ACTIONS_DELIMITER) - t = datetime.datetime.utcfromtimestamp(int(data[3])).isoformat() - self.add_rate_action(RateAction(data[0], data[1], data[2], t)) - f.close() - print "[Info] %s rate actions were initialized." % len(self._rate_actions) - - def add_user(self, user): - self._users[user.uid] = user - - def add_item(self, item): - self._items[item.iid] = item - - def add_rate_action(self, action): - self._rate_actions.append(action) - - def get_users(self): - return self._users - - def get_items(self): - return self._items - - def get_rate_actions(self): - return self._rate_actions - - def get_user(self, uid): - """return single user - """ - if uid in self._users: - return self._users[uid] - else: - return None - - def get_item(self, iid): - """return single item - """ - if iid in self._items: - return self._items[iid] - else: - return None - - def get_top_rated_items(self, uid, n): - """get top n rated iids by this uid - """ - if uid in self._users: - actions = filter(lambda u: u.uid==uid, self._rate_actions) - top = sorted(actions, key=attrgetter('rating'), reverse=True) - topn_iids = map(lambda a: a.iid, top[:n]) - return topn_iids - else: - return None - - def get_top_rate_actions(self, uid, n): - """get top n rated actions by this uid - """ - if uid in self._users: - actions = filter(lambda u: u.uid==uid, self._rate_actions) - top = sorted(actions, key=attrgetter('rating'), reverse=True) - return top[:n] - else: - return None http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/obsolete/itemrec/movies/batch_import.py ---------------------------------------------------------------------- diff --git a/examples/obsolete/itemrec/movies/batch_import.py b/examples/obsolete/itemrec/movies/batch_import.py deleted file mode 100644 index b2e0e63..0000000 --- a/examples/obsolete/itemrec/movies/batch_import.py +++ /dev/null @@ -1,65 +0,0 @@ -from appdata import AppData -import predictionio -import sys - -from app_config import APP_KEY, API_URL - -def batch_import_task(app_data, client, all_info=False): - - print "[Info] Importing users to PredictionIO..." - count = 0 - for k, v in app_data.get_users().iteritems(): - count += 1 - if all_info: - print "[Info] Importing %s..." % v - else: - if (count % 32 == 0): - sys.stdout.write('\r[Info] %s' % count) - sys.stdout.flush() - - client.create_user(v.uid) - - sys.stdout.write('\r[Info] %s users were imported.\n' % count) - sys.stdout.flush() - - print "[Info] Importing items to PredictionIO..." - count = 0 - for k, v in app_data.get_items().iteritems(): - count += 1 - if all_info: - print "[Info] Importing %s..." % v - else: - if (count % 32 == 0): - sys.stdout.write('\r[Info] %s' % count) - sys.stdout.flush() - - client.create_item(v.iid, ("movie",)) - - sys.stdout.write('\r[Info] %s items were imported.\n' % count) - sys.stdout.flush() - - print "[Info] Importing rate actions to PredictionIO..." - count = 0 - for v in app_data.get_rate_actions(): - count += 1 - if all_info: - print "[Info] Importing %s..." % v - else: - if (count % 32 == 0): - sys.stdout.write('\r[Info] %s' % count) - sys.stdout.flush() - - client.identify(v.uid) - client.record_action_on_item("rate", v.iid, { "pio_rate": v.rating, "pio_t": v.t }) - - sys.stdout.write('\r[Info] %s rate actions were imported.\n' % count) - sys.stdout.flush() - - -if __name__ == '__main__': - - app_data = AppData() - client = predictionio.Client(APP_KEY, 1, API_URL) - batch_import_task(app_data, client) - client.close() - http://git-wip-us.apache.org/repos/asf/incubator-predictionio-sdk-python/blob/bc678328/examples/obsolete/itemrec/movies/movie_rec_app.py ---------------------------------------------------------------------- diff --git a/examples/obsolete/itemrec/movies/movie_rec_app.py b/examples/obsolete/itemrec/movies/movie_rec_app.py deleted file mode 100644 index 436af38..0000000 --- a/examples/obsolete/itemrec/movies/movie_rec_app.py +++ /dev/null @@ -1,152 +0,0 @@ -# To run this example app -# -# Please execute all commands from repository root. -# -# Step 1. Get sample data and unzip it. -# > wget http://www.grouplens.org/system/files/ml-100k.zip -# > unzip ml-100k.zip -# -# Step 2. Configurate examples/itemrec/movies/appdata.py -# -# Step 3. Run this app: -# python -m examples.itemrec.movies.movie_rec_app - -from appdata import AppData -import predictionio -import sys - -from app_config import APP_KEY, API_URL - -ENGINE_NAME = 'movie-rec' - -class App: - - def __init__(self): - self._app_data = AppData() - self._client = predictionio.Client(APP_KEY, 1, API_URL) - - def run(self): - state = "[Main Menu]" - - prompt = "\n"\ - "%s\n"\ - "%s\n"\ - "Please input selection:\n"\ - " 0: Quit application.\n"\ - " 1: Get Recommendations from PredictionIO.\n"\ - " 2: Display user's data." % (state, '-'*len(state)) - - while True: - print prompt - choice = raw_input().lower() - if choice == '0': - print "\nGood Bye!\n" - break - elif choice == '1': - self.recommend_task(state) - elif choice == '2': - self.display_user_task(state) - else: - print '[Error] \'%s\' is not a valid selection.' % choice - - self._client.close() - - def recommend_task(self, prev_state): - state = prev_state + " / [Get Recommendations]" - prompt = "\n"\ - "%s\n"\ - "%s\n"\ - "Please enter user id:" % (state, '-'*len(state)) - - while True: - print prompt - choice = raw_input().lower() - u = self._app_data.get_user(choice) - if u: - n = 10 - print "[Info] Getting top %s item recommendations for user %s..." % (n, u.uid) - try: - self._client.identify(u.uid) - rec = self._client.get_itemrec_topn(ENGINE_NAME, n) - u.rec = rec['pio_iids'] - self.display_items(u.rec) - except predictionio.ItemRecNotFoundError: - print "[Info] Recommendation not found" - - print "[Info] Go back to previous menu..." - break - else: - print "[Error] invalid user id %s. Go back to previous menu..." % choice - break - - def display_user_task(self, prev_state): - state = prev_state + " / [Display User]" - prompt = "\n"\ - "%s\n"\ - "%s\n"\ - "Please enter user id:" % (state, '-'*len(state)) - - while True: - print prompt - choice = raw_input().lower() - u = self._app_data.get_user(choice) - if u: - print "[Info] User %s:" % u.uid - n = 10 - topn_rate_actions = self._app_data.get_top_rate_actions(u.uid, n) - print "\n[Info] Top %s movies rated by this user:" % n - self.display_rate_actions(topn_rate_actions) - - print "\n[Info] Movies recommended to this user:" - self.display_items(u.rec) - - self.wait_for_ack() - print "\n[Info] Go back to previous menu..." - break - else: - print "[Error] invalid user id %s. Go back to previous menu..." % choice - break - - def display_items(self, iids, all_info=False): - """print item info for each iid in the list - """ - if iids: - for iid in iids: - item = self._app_data.get_item(iid) - if item: - if all_info: - print "[Info] %s" % item - else: - print "[Info] %s" % item.name - else: - print "[Error] Invalid item id %s" % iid - else: - print "[Info] Empty." - - def display_rate_actions(self, actions): - """print iid and rating - """ - if actions: - for a in actions: - item = self._app_data.get_item(a.iid) - if item: - print "[Info] %s, rating = %s" % (item.name, a.rating) - else: - print "[Error] Invalid item id %s" % a.iid - else: - print "[Info] Empty." - - def wait_for_ack(self): - - prompt = "\nPress enter to continue..." - print prompt - choice = raw_input().lower() - - -if __name__ == '__main__': - - print "\nWelcome To PredictionIO Python-SDK Demo App!" - print "============================================\n" - - my_app = App() - my_app.run()
