http://git-wip-us.apache.org/repos/asf/flink-web/blob/f0ac0cdb/content/docs/0.9/libs/index.html ---------------------------------------------------------------------- diff --git a/content/docs/0.9/libs/index.html b/content/docs/0.9/libs/index.html deleted file mode 100644 index 18b2c88..0000000 --- a/content/docs/0.9/libs/index.html +++ /dev/null @@ -1,205 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> -<!DOCTYPE html> - -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - - <title>Apache Flink 0.9.0 Documentation: Libraries</title> - - <link rel="shortcut icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon"> - <link rel="icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/flink.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/syntax.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/codetabs.css"> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - - - - - <!-- Top navbar. --> - <nav class="navbar navbar-default navbar-fixed-top"> - <div class="container"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="http://flink.apache.org"><img alt="Apache Flink" src="http://flink.apache.org/docs/0.9/page/img/navbar-brand-logo.jpg"></a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav"> - <li><a href="http://flink.apache.org/docs/0.9/index.html">Overview<span class="hidden-sm hidden-xs"> 0.9.0</span></a></li> - - <!-- Setup --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/setup" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Setup <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/setup/building.html">Get Flink 0.9-SNAPSHOT</a></li> - - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Deployment</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/local_setup.html" class="active">Local</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/cluster_setup.html">Cluster (Standalone)</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/yarn_setup.html">YARN</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/gce_setup.html">GCloud</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/flink_on_tez.html">Flink on Tez <span class="badge">Beta</span></a></li> - - <li class="divider"></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/config.html">Configuration</a></li> - </ul> - </li> - - <!-- Programming Guides --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/apis" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Programming Guides <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/apis/programming_guide.html"><strong>Batch: DataSet API</strong></a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/streaming_guide.html"><strong>Streaming: DataStream API</strong> <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/python.html">Python API <span class="badge">Beta</span></a></li> - - <li class="divider"></li> - <li><a href="scala_shell.html">Interactive Scala Shell</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/dataset_transformations.html">Dataset Transformations</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/best_practices.html">Best Practices</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/example_connectors.html">Connectors</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/examples.html">Examples</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/local_execution.html">Local Execution</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/cluster_execution.html">Cluster Execution</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/cli.html">Command Line Interface</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/web_client.html">Web Client</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/iterations.html">Iterations</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/java8.html">Java 8</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/hadoop_compatibility.html">Hadoop Compatability <span class="badge">Beta</span></a></li> - </ul> - </li> - - <!-- Libraries --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/libs" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Libraries <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/libs/spargel_guide.html">Graphs: Spargel</a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/gelly_guide.html">Graphs: Gelly <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/ml/">Machine Learning <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/table.html">Relational: Table <span class="badge">Beta</span></a></li> - </ul> - </li> - - <!-- Internals --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/internals" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Internals <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/how_to_contribute.html">How to Contribute</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/coding_guidelines.html">Coding Guidelines</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/ide_setup.html">IDE Setup</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/logging.html">Logging</a></li> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Internals</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/general_arch.html">Architecture & Process Model</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/types_serialization.html">Type Extraction & Serialization</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/job_scheduling.html">Jobs & Scheduling</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/add_operator.html">How-To: Add an Operator</a></li> - </ul> - </li> - </ul> - <form class="navbar-form navbar-right hidden-sm hidden-md" role="search" action="http://flink.apache.org/docs/0.9/search-results.html"> - <div class="form-group"> - <input type="text" class="form-control" name="q" placeholder="Search all pages"> - </div> - <button type="submit" class="btn btn-default">Search</button> - </form> - </div><!-- /.navbar-collapse --> - </div><!-- /.container --> - </nav> - - - - - <!-- Main content. --> - <div class="container"> - - -<div class="row"> - <div class="col-sm-10 col-sm-offset-1"> - <h1>Libraries</h1> - - - - </div> - - <div class="col-sm-10 col-sm-offset-1"> - <!-- Disqus thread and some vertical offset --> - <div style="margin-top: 75px; margin-bottom: 50px" id="disqus_thread"></div> - </div> -</div> - - </div><!-- /.container --> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="http://flink.apache.org/docs/0.9/page/js/codetabs.js"></script> - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - - <!-- Disqus --> - <script type="text/javascript"> - var disqus_shortname = 'stratosphere-eu'; - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); -</script> - </body> -</html>
http://git-wip-us.apache.org/repos/asf/flink-web/blob/f0ac0cdb/content/docs/0.9/libs/ml/als.html ---------------------------------------------------------------------- diff --git a/content/docs/0.9/libs/ml/als.html b/content/docs/0.9/libs/ml/als.html deleted file mode 100644 index 04a0c1b..0000000 --- a/content/docs/0.9/libs/ml/als.html +++ /dev/null @@ -1,401 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> -<!DOCTYPE html> - -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - - <title>Apache Flink 0.9.0 Documentation: FlinkML - Alternating Least Squares</title> - - <link rel="shortcut icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon"> - <link rel="icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/flink.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/syntax.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/codetabs.css"> - - <script type="text/x-mathjax-config"> - MathJax.Hub.Config({ - tex2jax: { - inlineMath: [['$','$'], ['\\(','\\)']] }, - TeX: { - equationNumbers: { autoNumber: "AMS" } } - }); - </script> - <script type="text/javascript" - src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"> - </script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - - - - - <!-- Top navbar. --> - <nav class="navbar navbar-default navbar-fixed-top"> - <div class="container"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="http://flink.apache.org"><img alt="Apache Flink" src="http://flink.apache.org/docs/0.9/page/img/navbar-brand-logo.jpg"></a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav"> - <li><a href="http://flink.apache.org/docs/0.9/index.html">Overview<span class="hidden-sm hidden-xs"> 0.9.0</span></a></li> - - <!-- Setup --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/setup" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Setup <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/setup/building.html">Get Flink 0.9-SNAPSHOT</a></li> - - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Deployment</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/local_setup.html" class="active">Local</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/cluster_setup.html">Cluster (Standalone)</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/yarn_setup.html">YARN</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/gce_setup.html">GCloud</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/flink_on_tez.html">Flink on Tez <span class="badge">Beta</span></a></li> - - <li class="divider"></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/config.html">Configuration</a></li> - </ul> - </li> - - <!-- Programming Guides --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/apis" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Programming Guides <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/apis/programming_guide.html"><strong>Batch: DataSet API</strong></a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/streaming_guide.html"><strong>Streaming: DataStream API</strong> <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/python.html">Python API <span class="badge">Beta</span></a></li> - - <li class="divider"></li> - <li><a href="scala_shell.html">Interactive Scala Shell</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/dataset_transformations.html">Dataset Transformations</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/best_practices.html">Best Practices</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/example_connectors.html">Connectors</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/examples.html">Examples</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/local_execution.html">Local Execution</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/cluster_execution.html">Cluster Execution</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/cli.html">Command Line Interface</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/web_client.html">Web Client</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/iterations.html">Iterations</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/java8.html">Java 8</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/hadoop_compatibility.html">Hadoop Compatability <span class="badge">Beta</span></a></li> - </ul> - </li> - - <!-- Libraries --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/libs" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Libraries <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/libs/spargel_guide.html">Graphs: Spargel</a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/gelly_guide.html">Graphs: Gelly <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/ml/">Machine Learning <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/table.html">Relational: Table <span class="badge">Beta</span></a></li> - </ul> - </li> - - <!-- Internals --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/internals" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Internals <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/how_to_contribute.html">How to Contribute</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/coding_guidelines.html">Coding Guidelines</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/ide_setup.html">IDE Setup</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/logging.html">Logging</a></li> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Internals</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/general_arch.html">Architecture & Process Model</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/types_serialization.html">Type Extraction & Serialization</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/job_scheduling.html">Jobs & Scheduling</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/add_operator.html">How-To: Add an Operator</a></li> - </ul> - </li> - </ul> - <form class="navbar-form navbar-right hidden-sm hidden-md" role="search" action="http://flink.apache.org/docs/0.9/search-results.html"> - <div class="form-group"> - <input type="text" class="form-control" name="q" placeholder="Search all pages"> - </div> - <button type="submit" class="btn btn-default">Search</button> - </form> - </div><!-- /.navbar-collapse --> - </div><!-- /.container --> - </nav> - - - - - -<!--Some of the Latex math notation has been adapted from Apache Spark MLlib's documentation--> -$$ -\newcommand{\R}{\mathbb{R}} -\newcommand{\E}{\mathbb{E}} -\newcommand{\x}{\mathbf{x}} -\newcommand{\y}{\mathbf{y}} -\newcommand{\wv}{\mathbf{w}} -\newcommand{\av}{\mathbf{\alpha}} -\newcommand{\bv}{\mathbf{b}} -\newcommand{\N}{\mathbb{N}} -\newcommand{\id}{\mathbf{I}} -\newcommand{\ind}{\mathbf{1}} -\newcommand{\0}{\mathbf{0}} -\newcommand{\unit}{\mathbf{e}} -\newcommand{\one}{\mathbf{1}} -\newcommand{\zero}{\mathbf{0}} -\newcommand\rfrac[2]{^{#1}\!/_{#2}} -\newcommand{\norm}[1]{\left\lVert#1\right\rVert} -$$ - - - <!-- Main content. --> - <div class="container"> - - -<div class="row"> - <div class="col-sm-10 col-sm-offset-1"> - <h1><a href="../ml">FlinkML</a> - Alternating Least Squares</h1> - - - -<ul id="markdown-toc"> - <li><a href="#description" id="markdown-toc-description">Description</a></li> - <li><a href="#operations" id="markdown-toc-operations">Operations</a> <ul> - <li><a href="#fit" id="markdown-toc-fit">Fit</a></li> - <li><a href="#predict" id="markdown-toc-predict">Predict</a></li> - </ul> - </li> - <li><a href="#parameters" id="markdown-toc-parameters">Parameters</a></li> - <li><a href="#examples" id="markdown-toc-examples">Examples</a></li> -</ul> - -<h2 id="description">Description</h2> - -<p>The alternating least squares (ALS) algorithm factorizes a given matrix $R$ into two factors $U$ and $V$ such that $R \approx U^TV$. -The unknown row dimension is given as a parameter to the algorithm and is called latent factors. -Since matrix factorization can be used in the context of recommendation, the matrices $U$ and $V$ can be called user and item matrix, respectively. -The $i$th column of the user matrix is denoted by $u_i$ and the $i$th column of the item matrix is $v_i$. -The matrix $R$ can be called the ratings matrix with <script type="math/tex">(R)_{i,j} = r_{i,j}</script>.</p> - -<p>In order to find the user and item matrix, the following problem is solved:</p> - -<script type="math/tex; mode=display">\arg\min_{U,V} \sum_{\{i,j\mid r_{i,j} \not= 0\}} \left(r_{i,j} - u_{i}^Tv_{j}\right)^2 + -\lambda \left(\sum_{i} n_{u_i} \left\lVert u_i \right\rVert^2 + \sum_{j} n_{v_j} \left\lVert v_j \right\rVert^2 \right)</script> - -<p>with $\lambda$ being the regularization factor, <script type="math/tex">n_{u_i}</script> being the number of items the user $i$ has rated and <script type="math/tex">n_{v_j}</script> being the number of times the item $j$ has been rated. -This regularization scheme to avoid overfitting is called weighted-$\lambda$-regularization. -Details can be found in the work of <a href="http://dx.doi.org/10.1007/978-3-540-68880-8_32">Zhou et al.</a>.</p> - -<p>By fixing one of the matrices $U$ or $V$, we obtain a quadratic form which can be solved directly. -The solution of the modified problem is guaranteed to monotonically decrease the overall cost function. -By applying this step alternately to the matrices $U$ and $V$, we can iteratively improve the matrix factorization.</p> - -<p>The matrix $R$ is given in its sparse representation as a tuple of $(i, j, r)$ where $i$ denotes the row index, $j$ the column index and $r$ is the matrix value at position $(i,j)$.</p> - -<h2 id="operations">Operations</h2> - -<p><code>ALS</code> is a <code>Predictor</code>. -As such, it supports the <code>fit</code> and <code>predict</code> operation.</p> - -<h3 id="fit">Fit</h3> - -<p>ALS is trained on the sparse representation of the rating matrix:</p> - -<ul> - <li><code>fit: DataSet[(Int, Int, Double)] => Unit</code></li> -</ul> - -<h3 id="predict">Predict</h3> - -<p>ALS predicts for each tuple of row and column index the rating:</p> - -<ul> - <li><code>predict: DataSet[(Int, Int)] => DataSet[(Int, Int, Double)]</code></li> -</ul> - -<h2 id="parameters">Parameters</h2> - -<p>The alternating least squares implementation can be controlled by the following parameters:</p> - -<table class="table table-bordered"> - <thead> - <tr> - <th class="text-left" style="width: 20%">Parameters</th> - <th class="text-center">Description</th> - </tr> - </thead> - - <tbody> - <tr> - <td><strong>NumFactors</strong></td> - <td> - <p> - The number of latent factors to use for the underlying model. - It is equivalent to the dimension of the calculated user and item vectors. - (Default value: <strong>10</strong>) - </p> - </td> - </tr> - <tr> - <td><strong>Lambda</strong></td> - <td> - <p> - Regularization factor. Tune this value in order to avoid overfitting or poor performance due to strong generalization. - (Default value: <strong>1</strong>) - </p> - </td> - </tr> - <tr> - <td><strong>Iterations</strong></td> - <td> - <p> - The maximum number of iterations. - (Default value: <strong>10</strong>) - </p> - </td> - </tr> - <tr> - <td><strong>Blocks</strong></td> - <td> - <p> - The number of blocks into which the user and item matrix are grouped. - The fewer blocks one uses, the less data is sent redundantly. - However, bigger blocks entail bigger update messages which have to be stored on the heap. - If the algorithm fails because of an OutOfMemoryException, then try to increase the number of blocks. - (Default value: <strong>None</strong>) - </p> - </td> - </tr> - <tr> - <td><strong>Seed</strong></td> - <td> - <p> - Random seed used to generate the initial item matrix for the algorithm. - (Default value: <strong>0</strong>) - </p> - </td> - </tr> - <tr> - <td><strong>TemporaryPath</strong></td> - <td> - <p> - Path to a temporary directory into which intermediate results are stored. - If this value is set, then the algorithm is split into two preprocessing steps, the ALS iteration and a post-processing step which calculates a last ALS half-step. - The preprocessing steps calculate the <code>OutBlockInformation</code> and <code>InBlockInformation</code> for the given rating matrix. - The results of the individual steps are stored in the specified directory. - By splitting the algorithm into multiple smaller steps, Flink does not have to split the available memory amongst too many operators. - This allows the system to process bigger individual messages and improves the overall performance. - (Default value: <strong>None</strong>) - </p> - </td> - </tr> - </tbody> - </table> - -<h2 id="examples">Examples</h2> - -<div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="c1">// Read input data set from a csv file</span> -<span class="k">val</span> <span class="n">inputDS</span><span class="k">:</span> <span class="kt">DataSet</span><span class="o">[(</span><span class="kt">Int</span>, <span class="kt">Int</span>, <span class="kt">Double</span><span class="o">)]</span> <span class="k">=</span> <span class="n">env</span><span class="o">.</span><span class="n">readCsvFile</span><span class="o">[(</span><span class="kt">Int</span>, <span class="kt">Int</span>, <span class="kt">Double</span><span class="o">)](</span> - <span class="n">pathToTrainingFile</span><span class="o">)</span> - -<span class="c1">// Setup the ALS learner</span> -<span class="k">val</span> <span class="n">als</span> <span class="k">=</span> <span class="nc">ALS</span><span class="o">()</span> -<span class="o">.</span><span class="n">setIterations</span><span class="o">(</span><span class="mi">10</span><span class="o">)</span> -<span class="o">.</span><span class="n">setNumFactors</span><span class="o">(</span><span class="mi">10</span><span class="o">)</span> -<span class="o">.</span><span class="n">setBlocks</span><span class="o">(</span><span class="mi">100</span><span class="o">)</span> -<span class="o">.</span><span class="n">setTemporaryPath</span><span class="o">(</span><span class="s">"hdfs://tempPath"</span><span class="o">)</span> - -<span class="c1">// Set the other parameters via a parameter map</span> -<span class="k">val</span> <span class="n">parameters</span> <span class="k">=</span> <span class="nc">ParameterMap</span><span class="o">()</span> -<span class="o">.</span><span class="n">add</span><span class="o">(</span><span class="nc">ALS</span><span class="o">.</span><span class="nc">Lambda</span><span class="o">,</span> <span class="mf">0.9</span><span class="o">)</span> -<span class="o">.</span><span class="n">add</span><span class="o">(</span><span class="nc">ALS</span><span class="o">.</span><span class="nc">Seed</span><span class="o">,</span> <span class="mi">42L</span><span class="o">)</span> - -<span class="c1">// Calculate the factorization</span> -<span class="n">als</span><span class="o">.</span><span class="n">fit</span><span class="o">(</span><span class="n">inputDS</span><span class="o">,</span> <span class="n">parameters</span><span class="o">)</span> - -<span class="c1">// Read the testing data set from a csv file</span> -<span class="k">val</span> <span class="n">testingDS</span><span class="k">:</span> <span class="kt">DataSet</span><span class="o">[(</span><span class="kt">Int</span>, <span class="kt">Int</span><span class="o">)]</span> <span class="k">=</span> <span class="n">env</span><span class="o">.</span><span class="n">readCsvFile</span><span class="o">[(</span><span class="kt">Int</span>, <span class="kt">Int</span><span class="o">)](</span><span class="n">pathToData</span><span class="o">)</span> - -<span class="c1">// Calculate the ratings according to the matrix factorization</span> -<span class="k">val</span> <span class="n">predictedRatings</span> <span class="k">=</span> <span class="n">als</span><span class="o">.</span><span class="n">predict</span><span class="o">(</span><span class="n">testingDS</span><span class="o">)</span></code></pre></div> - - - </div> - - <div class="col-sm-10 col-sm-offset-1"> - <!-- Disqus thread and some vertical offset --> - <div style="margin-top: 75px; margin-bottom: 50px" id="disqus_thread"></div> - </div> -</div> - - </div><!-- /.container --> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="http://flink.apache.org/docs/0.9/page/js/codetabs.js"></script> - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - - <!-- Disqus --> - <script type="text/javascript"> - var disqus_shortname = 'stratosphere-eu'; - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); -</script> - </body> -</html> http://git-wip-us.apache.org/repos/asf/flink-web/blob/f0ac0cdb/content/docs/0.9/libs/ml/contribution_guide.html ---------------------------------------------------------------------- diff --git a/content/docs/0.9/libs/ml/contribution_guide.html b/content/docs/0.9/libs/ml/contribution_guide.html deleted file mode 100644 index 2f99941..0000000 --- a/content/docs/0.9/libs/ml/contribution_guide.html +++ /dev/null @@ -1,325 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> -<!DOCTYPE html> - -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - - <title>Apache Flink 0.9.0 Documentation: FlinkML - How to Contribute</title> - - <link rel="shortcut icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon"> - <link rel="icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/flink.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/syntax.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/codetabs.css"> - - <script type="text/x-mathjax-config"> - MathJax.Hub.Config({ - tex2jax: { - inlineMath: [['$','$'], ['\\(','\\)']] }, - TeX: { - equationNumbers: { autoNumber: "AMS" } } - }); - </script> - <script type="text/javascript" - src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"> - </script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - - - - - <!-- Top navbar. --> - <nav class="navbar navbar-default navbar-fixed-top"> - <div class="container"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="http://flink.apache.org"><img alt="Apache Flink" src="http://flink.apache.org/docs/0.9/page/img/navbar-brand-logo.jpg"></a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav"> - <li><a href="http://flink.apache.org/docs/0.9/index.html">Overview<span class="hidden-sm hidden-xs"> 0.9.0</span></a></li> - - <!-- Setup --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/setup" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Setup <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/setup/building.html">Get Flink 0.9-SNAPSHOT</a></li> - - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Deployment</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/local_setup.html" class="active">Local</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/cluster_setup.html">Cluster (Standalone)</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/yarn_setup.html">YARN</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/gce_setup.html">GCloud</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/flink_on_tez.html">Flink on Tez <span class="badge">Beta</span></a></li> - - <li class="divider"></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/config.html">Configuration</a></li> - </ul> - </li> - - <!-- Programming Guides --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/apis" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Programming Guides <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/apis/programming_guide.html"><strong>Batch: DataSet API</strong></a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/streaming_guide.html"><strong>Streaming: DataStream API</strong> <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/python.html">Python API <span class="badge">Beta</span></a></li> - - <li class="divider"></li> - <li><a href="scala_shell.html">Interactive Scala Shell</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/dataset_transformations.html">Dataset Transformations</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/best_practices.html">Best Practices</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/example_connectors.html">Connectors</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/examples.html">Examples</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/local_execution.html">Local Execution</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/cluster_execution.html">Cluster Execution</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/cli.html">Command Line Interface</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/web_client.html">Web Client</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/iterations.html">Iterations</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/java8.html">Java 8</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/hadoop_compatibility.html">Hadoop Compatability <span class="badge">Beta</span></a></li> - </ul> - </li> - - <!-- Libraries --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/libs" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Libraries <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/libs/spargel_guide.html">Graphs: Spargel</a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/gelly_guide.html">Graphs: Gelly <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/ml/">Machine Learning <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/table.html">Relational: Table <span class="badge">Beta</span></a></li> - </ul> - </li> - - <!-- Internals --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/internals" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Internals <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/how_to_contribute.html">How to Contribute</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/coding_guidelines.html">Coding Guidelines</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/ide_setup.html">IDE Setup</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/logging.html">Logging</a></li> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Internals</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/general_arch.html">Architecture & Process Model</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/types_serialization.html">Type Extraction & Serialization</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/job_scheduling.html">Jobs & Scheduling</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/add_operator.html">How-To: Add an Operator</a></li> - </ul> - </li> - </ul> - <form class="navbar-form navbar-right hidden-sm hidden-md" role="search" action="http://flink.apache.org/docs/0.9/search-results.html"> - <div class="form-group"> - <input type="text" class="form-control" name="q" placeholder="Search all pages"> - </div> - <button type="submit" class="btn btn-default">Search</button> - </form> - </div><!-- /.navbar-collapse --> - </div><!-- /.container --> - </nav> - - - - - -<!--Some of the Latex math notation has been adapted from Apache Spark MLlib's documentation--> -$$ -\newcommand{\R}{\mathbb{R}} -\newcommand{\E}{\mathbb{E}} -\newcommand{\x}{\mathbf{x}} -\newcommand{\y}{\mathbf{y}} -\newcommand{\wv}{\mathbf{w}} -\newcommand{\av}{\mathbf{\alpha}} -\newcommand{\bv}{\mathbf{b}} -\newcommand{\N}{\mathbb{N}} -\newcommand{\id}{\mathbf{I}} -\newcommand{\ind}{\mathbf{1}} -\newcommand{\0}{\mathbf{0}} -\newcommand{\unit}{\mathbf{e}} -\newcommand{\one}{\mathbf{1}} -\newcommand{\zero}{\mathbf{0}} -\newcommand\rfrac[2]{^{#1}\!/_{#2}} -\newcommand{\norm}[1]{\left\lVert#1\right\rVert} -$$ - - - <!-- Main content. --> - <div class="container"> - - -<div class="row"> - <div class="col-sm-10 col-sm-offset-1"> - <h1><a href="../ml">FlinkML</a> - How to Contribute</h1> - - - -<p>The Flink community highly appreciates all sorts of contributions to FlinkML. -FlinkML offers people interested in machine learning to work on a highly active open source project which makes scalable ML reality. -The following document describes how to contribute to FlinkML.</p> - -<ul id="markdown-toc"> - <li><a href="#getting-started" id="markdown-toc-getting-started">Getting Started</a></li> - <li><a href="#pick-a-topic" id="markdown-toc-pick-a-topic">Pick a Topic</a></li> - <li><a href="#testing" id="markdown-toc-testing">Testing</a></li> - <li><a href="#documentation" id="markdown-toc-documentation">Documentation</a></li> - <li><a href="#contributing" id="markdown-toc-contributing">Contributing</a></li> -</ul> - -<h2 id="getting-started">Getting Started</h2> - -<p>In order to get started first read Flinkâs <a href="http://flink.apache.org/how-to-contribute.html">contribution guide</a>. -Everything from this guide also applies to FlinkML.</p> - -<h2 id="pick-a-topic">Pick a Topic</h2> - -<p>If you are looking for some new ideas you should first look into our <a href="vision_roadmap.html#Roadmap">roadmap</a>, then you should check out the list of <a href="https://issues.apache.org/jira/issues/?jql=component%20%3D%20%22Machine%20Learning%20Library%22%20AND%20project%20%3D%20FLINK%20AND%20resolution%20%3D%20Unresolved%20ORDER%20BY%20priority%20DESC">unresolved issues on JIRA</a>. -Once you decide to contribute to one of these issues, you should take ownership of it and track your progress with this issue. -That way, the other contributors know the state of the different issues and redundant work is avoided.</p> - -<p>If you already know what you want to contribute to FlinkML all the better. -It is still advisable to create a JIRA issue for your idea to tell the Flink community what you want to do, though.</p> - -<h2 id="testing">Testing</h2> - -<p>New contributions should come with tests to verify the correct behavior of the algorithm. -The tests help to maintain the algorithmâs correctness throughout code changes, e.g. refactorings.</p> - -<p>We distinguish between unit tests, which are executed during Mavenâs test phase, and integration tests, which are executed during mavenâs verify phase. -Maven automatically makes this distinction by using the following naming rules: -All test cases whose class name ends with a suffix fulfilling the regular expression <code>(IT|Integration)(Test|Suite|Case)</code>, are considered integration tests. -The rest are considered unit tests and should only test behavior which is local to the component under test.</p> - -<p>An integration test is a test which requires the full Flink system to be started. -In order to do that properly, all integration test cases have to mix in the trait <code>FlinkTestBase</code>. -This trait will set the right <code>ExecutionEnvironment</code> so that the test will be executed on a special <code>FlinkMiniCluster</code> designated for testing purposes. -Thus, an integration test could look the following:</p> - -<div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">class</span> <span class="nc">ExampleITSuite</span> <span class="k">extends</span> <span class="nc">FlatSpec</span> <span class="k">with</span> <span class="nc">FlinkTestBase</span> <span class="o">{</span> - <span class="n">behavior</span> <span class="n">of</span> <span class="s">"An example algorithm"</span> - - <span class="n">it</span> <span class="n">should</span> <span class="s">"do something"</span> <span class="n">in</span> <span class="o">{</span> - <span class="o">...</span> - <span class="o">}</span> -<span class="o">}</span></code></pre></div> - -<p>The test style does not have to be <code>FlatSpec</code> but can be any other scalatest <code>Suite</code> subclass. -See <a href="http://scalatest.org/user_guide/selecting_a_style">ScalaTest testing styles</a> for more information.</p> - -<h2 id="documentation">Documentation</h2> - -<p>When contributing new algorithms, it is required to add code comments describing the way the algorithm works and its parameters with which the user can control its behavior. -Additionally, we would like to encourage contributors to add this information to the online documentation. -The online documentation for FlinkMLâs components can be found in the directory <code>docs/libs/ml</code>.</p> - -<p>Every new algorithm is described by a single markdown file. -This file should contain at least the following points:</p> - -<ol> - <li>What does the algorithm do</li> - <li>How does the algorithm work (or reference to description)</li> - <li>Parameter description with default values</li> - <li>Code snippet showing how the algorithm is used</li> -</ol> - -<p>In order to use latex syntax in the markdown file, you have to include <code>mathjax: include</code> in the YAML front matter.</p> - -<div class="highlight"><pre><code class="language-java" data-lang="java"><span class="o">---</span> -<span class="nl">mathjax:</span> <span class="n">include</span> -<span class="nl">htmlTitle:</span> <span class="n">FlinkML</span> <span class="o">-</span> <span class="n">Example</span> <span class="n">title</span> -<span class="nl">title:</span> <span class="o"><</span><span class="n">a</span> <span class="n">href</span><span class="o">=</span><span class="s">"../ml"</span><span class="o">></span><span class="n">FlinkML</span><span class="o"></</span><span class="n">a</span><span class="o">></span> <span class="o">-</span> <span class="n">Example</span> <span class="n">title</span> -<span class="o">---</span></code></pre></div> - -<p>In order to use displayed mathematics, you have to put your latex code in <code>$$ ... $$</code>. -For in-line mathematics, use <code>$ ... $</code>. -Additionally some predefined latex commands are included into the scope of your markdown file. -See <code>docs/_include/latex_commands.html</code> for the complete list of predefined latex commands.</p> - -<h2 id="contributing">Contributing</h2> - -<p>Once you have implemented the algorithm with adequate test coverage and added documentation, you are ready to open a pull request. -Details of how to open a pull request can be found <a href="http://flink.apache.org/how-to-contribute.html#contributing-code--documentation">here</a>.</p> - - </div> - - <div class="col-sm-10 col-sm-offset-1"> - <!-- Disqus thread and some vertical offset --> - <div style="margin-top: 75px; margin-bottom: 50px" id="disqus_thread"></div> - </div> -</div> - - </div><!-- /.container --> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="http://flink.apache.org/docs/0.9/page/js/codetabs.js"></script> - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - - <!-- Disqus --> - <script type="text/javascript"> - var disqus_shortname = 'stratosphere-eu'; - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); -</script> - </body> -</html> http://git-wip-us.apache.org/repos/asf/flink-web/blob/f0ac0cdb/content/docs/0.9/libs/ml/distance_metrics.html ---------------------------------------------------------------------- diff --git a/content/docs/0.9/libs/ml/distance_metrics.html b/content/docs/0.9/libs/ml/distance_metrics.html deleted file mode 100644 index 9272077..0000000 --- a/content/docs/0.9/libs/ml/distance_metrics.html +++ /dev/null @@ -1,325 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> -<!DOCTYPE html> - -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - - <title>Apache Flink 0.9.0 Documentation: FlinkML - Distance Metrics</title> - - <link rel="shortcut icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon"> - <link rel="icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/flink.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/syntax.css"> - <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/codetabs.css"> - - <script type="text/x-mathjax-config"> - MathJax.Hub.Config({ - tex2jax: { - inlineMath: [['$','$'], ['\\(','\\)']] }, - TeX: { - equationNumbers: { autoNumber: "AMS" } } - }); - </script> - <script type="text/javascript" - src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"> - </script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - - - - - <!-- Top navbar. --> - <nav class="navbar navbar-default navbar-fixed-top"> - <div class="container"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="http://flink.apache.org"><img alt="Apache Flink" src="http://flink.apache.org/docs/0.9/page/img/navbar-brand-logo.jpg"></a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav"> - <li><a href="http://flink.apache.org/docs/0.9/index.html">Overview<span class="hidden-sm hidden-xs"> 0.9.0</span></a></li> - - <!-- Setup --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/setup" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Setup <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/setup/building.html">Get Flink 0.9-SNAPSHOT</a></li> - - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Deployment</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/local_setup.html" class="active">Local</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/cluster_setup.html">Cluster (Standalone)</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/yarn_setup.html">YARN</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/gce_setup.html">GCloud</a></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/flink_on_tez.html">Flink on Tez <span class="badge">Beta</span></a></li> - - <li class="divider"></li> - <li><a href="http://flink.apache.org/docs/0.9/setup/config.html">Configuration</a></li> - </ul> - </li> - - <!-- Programming Guides --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/apis" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Programming Guides <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/apis/programming_guide.html"><strong>Batch: DataSet API</strong></a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/streaming_guide.html"><strong>Streaming: DataStream API</strong> <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/python.html">Python API <span class="badge">Beta</span></a></li> - - <li class="divider"></li> - <li><a href="scala_shell.html">Interactive Scala Shell</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/dataset_transformations.html">Dataset Transformations</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/best_practices.html">Best Practices</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/example_connectors.html">Connectors</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/examples.html">Examples</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/local_execution.html">Local Execution</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/cluster_execution.html">Cluster Execution</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/cli.html">Command Line Interface</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/web_client.html">Web Client</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/iterations.html">Iterations</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/java8.html">Java 8</a></li> - <li><a href="http://flink.apache.org/docs/0.9/apis/hadoop_compatibility.html">Hadoop Compatability <span class="badge">Beta</span></a></li> - </ul> - </li> - - <!-- Libraries --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/libs" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Libraries <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li><a href="http://flink.apache.org/docs/0.9/libs/spargel_guide.html">Graphs: Spargel</a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/gelly_guide.html">Graphs: Gelly <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/ml/">Machine Learning <span class="badge">Beta</span></a></li> - <li><a href="http://flink.apache.org/docs/0.9/libs/table.html">Relational: Table <span class="badge">Beta</span></a></li> - </ul> - </li> - - <!-- Internals --> - <li class="dropdown"> - <a href="http://flink.apache.org/docs/0.9/internals" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Internals <span class="caret"></span></a> - <ul class="dropdown-menu" role="menu"> - <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/how_to_contribute.html">How to Contribute</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/coding_guidelines.html">Coding Guidelines</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/ide_setup.html">IDE Setup</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/logging.html">Logging</a></li> - <li class="divider"></li> - <li role="presentation" class="dropdown-header"><strong>Internals</strong></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/general_arch.html">Architecture & Process Model</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/types_serialization.html">Type Extraction & Serialization</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/job_scheduling.html">Jobs & Scheduling</a></li> - <li><a href="http://flink.apache.org/docs/0.9/internals/add_operator.html">How-To: Add an Operator</a></li> - </ul> - </li> - </ul> - <form class="navbar-form navbar-right hidden-sm hidden-md" role="search" action="http://flink.apache.org/docs/0.9/search-results.html"> - <div class="form-group"> - <input type="text" class="form-control" name="q" placeholder="Search all pages"> - </div> - <button type="submit" class="btn btn-default">Search</button> - </form> - </div><!-- /.navbar-collapse --> - </div><!-- /.container --> - </nav> - - - - - -<!--Some of the Latex math notation has been adapted from Apache Spark MLlib's documentation--> -$$ -\newcommand{\R}{\mathbb{R}} -\newcommand{\E}{\mathbb{E}} -\newcommand{\x}{\mathbf{x}} -\newcommand{\y}{\mathbf{y}} -\newcommand{\wv}{\mathbf{w}} -\newcommand{\av}{\mathbf{\alpha}} -\newcommand{\bv}{\mathbf{b}} -\newcommand{\N}{\mathbb{N}} -\newcommand{\id}{\mathbf{I}} -\newcommand{\ind}{\mathbf{1}} -\newcommand{\0}{\mathbf{0}} -\newcommand{\unit}{\mathbf{e}} -\newcommand{\one}{\mathbf{1}} -\newcommand{\zero}{\mathbf{0}} -\newcommand\rfrac[2]{^{#1}\!/_{#2}} -\newcommand{\norm}[1]{\left\lVert#1\right\rVert} -$$ - - - <!-- Main content. --> - <div class="container"> - - -<div class="row"> - <div class="col-sm-10 col-sm-offset-1"> - <h1><a href="../ml">FlinkML</a> - Distance Metrics</h1> - - - -<ul id="markdown-toc"> - <li><a href="#description" id="markdown-toc-description">Description</a></li> - <li><a href="#built-in-implementations" id="markdown-toc-built-in-implementations">Built-in Implementations</a></li> - <li><a href="#custom-implementation" id="markdown-toc-custom-implementation">Custom Implementation</a></li> -</ul> - -<h2 id="description">Description</h2> - -<p>Different metrics of distance are convenient for different types of analysis. Flink ML provides -built-in implementations for many standard distance metrics. You can create custom -distance metrics by implementing the <code>DistanceMetric</code> trait.</p> - -<h2 id="built-in-implementations">Built-in Implementations</h2> - -<p>Currently, FlinkML supports the following metrics:</p> - -<table class="table table-bordered"> - <thead> - <tr> - <th class="text-left" style="width: 20%">Metric</th> - <th class="text-center">Description</th> - </tr> - </thead> - - <tbody> - <tr> - <td><strong>Euclidean Distance</strong></td> - <td> - $$d(\x, \y) = \sqrt{\sum_{i=1}^n \left(x_i - y_i \right)^2}$$ - </td> - </tr> - <tr> - <td><strong>Squared Euclidean Distance</strong></td> - <td> - $$d(\x, \y) = \sum_{i=1}^n \left(x_i - y_i \right)^2$$ - </td> - </tr> - <tr> - <td><strong>Cosine Similarity</strong></td> - <td> - $$d(\x, \y) = 1 - \frac{\x^T \y}{\Vert \x \Vert \Vert \y \Vert}$$ - </td> - </tr> - <tr> - <td><strong>Chebyshev Distance</strong></td> - <td> - $$d(\x, \y) = \max_{i}\left(\left \vert x_i - y_i \right\vert \right)$$ - </td> - </tr> - <tr> - <td><strong>Manhattan Distance</strong></td> - <td> - $$d(\x, \y) = \sum_{i=1}^n \left\vert x_i - y_i \right\vert$$ - </td> - </tr> - <tr> - <td><strong>Minkowski Distance</strong></td> - <td> - $$d(\x, \y) = \left( \sum_{i=1}^{n} \left( x_i - y_i \right)^p \right)^{\rfrac{1}{p}}$$ - </td> - </tr> - <tr> - <td><strong>Tanimoto Distance</strong></td> - <td> - $$d(\x, \y) = 1 - \frac{\x^T\y}{\Vert \x \Vert^2 + \Vert \y \Vert^2 - \x^T\y}$$ - with $\x$ and $\y$ being bit-vectors - </td> - </tr> - </tbody> - </table> - -<h2 id="custom-implementation">Custom Implementation</h2> - -<p>You can create your own distance metric by implementing the <code>DistanceMetric</code> trait.</p> - -<div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">class</span> <span class="nc">MyDistance</span> <span class="k">extends</span> <span class="nc">DistanceMetric</span> <span class="o">{</span> - <span class="k">override</span> <span class="k">def</span> <span class="n">distance</span><span class="o">(</span><span class="n">a</span><span class="k">:</span> <span class="kt">Vector</span><span class="o">,</span> <span class="n">b</span><span class="k">:</span> <span class="kt">Vector</span><span class="o">)</span> <span class="k">=</span> <span class="o">...</span> <span class="c1">// your implementation for distance metric</span> -<span class="o">}</span> - -<span class="k">object</span> <span class="nc">MyDistance</span> <span class="o">{</span> - <span class="k">def</span> <span class="n">apply</span><span class="o">()</span> <span class="k">=</span> <span class="k">new</span> <span class="nc">MyDistance</span><span class="o">()</span> -<span class="o">}</span> - -<span class="k">val</span> <span class="n">myMetric</span> <span class="k">=</span> <span class="nc">MyDistance</span><span class="o">()</span></code></pre></div> - - - </div> - - <div class="col-sm-10 col-sm-offset-1"> - <!-- Disqus thread and some vertical offset --> - <div style="margin-top: 75px; margin-bottom: 50px" id="disqus_thread"></div> - </div> -</div> - - </div><!-- /.container --> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="http://flink.apache.org/docs/0.9/page/js/codetabs.js"></script> - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - - <!-- Disqus --> - <script type="text/javascript"> - var disqus_shortname = 'stratosphere-eu'; - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); -</script> - </body> -</html>
