[28/51] [partial] madlib-site git commit: Doc: Add v1.15.1 documentation

nkak Mon, 15 Oct 2018 11:48:51 -0700

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__bfs.html
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__bfs.html 
b/docs/v1.15.1/group__grp__bfs.html
new file mode 100644
index 0000000..0bf55a8
--- /dev/null
+++ b/docs/v1.15.1/group__grp__bfs.html
@@ -0,0 +1,421 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.14"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Breadth-First Search</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(initResizable);
+/* @license-end */</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(function() { init_search(); });
+/* @license-end */
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" async 
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js";></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org";><img alt="Logo" 
src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.15.1</span>
+   </div>
+   <div id="projectbrief">User Documentation for Apache MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.14 -->
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+/* @license-end */
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+$(document).ready(function(){initNavTree('group__grp__bfs.html','');});
+/* @license-end */
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Breadth-First Search<div class="ingroups"><a class="el" 
href="group__grp__graph.html">Graph</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#bfs">Breadth-First Search</a> </li>
+<li>
+<a href="#notes">Notes</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#literature">Literature</a> </li>
+</ul>
+</div><p>Given a graph and a source vertex, the breadth-first search (BFS) 
algorithm finds all nodes reachable from the source vertex by searching / 
traversing the graph in a breadth-first manner.</p>
+<p><a class="anchor" id="bfs"></a></p><dl class="section 
user"><dt>BFS</dt><dd><pre class="syntax">
+graph_bfs( vertex_table,
+           vertex_id,
+           edge_table,
+           edge_args,
+           source_vertex,
+           out_table,
+           max_distance,
+           directed,
+           grouping_cols
+          )
+</pre></dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>vertex_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the vertex data for 
the graph. Must contain the column specified in the 'vertex_id' parameter 
below.</p>
+<p class="enddd"></p>
+</dd>
+<dt>vertex_id </dt>
+<dd><p class="startdd">TEXT, default = 'id'. Name of the column in 
'vertex_table' containing vertex ids. The vertex ids are of type INTEGER with 
no duplicates. They do not need to be contiguous.</p>
+<p class="enddd"></p>
+</dd>
+<dt>edge_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the edge data. The 
edge table must contain columns for source vertex and destination vertex. 
Column naming convention is described below in the 'edge_args' parameter. In 
addition to vertex columns, if grouping is used then the columns specified in 
the 'grouping_cols' parameter must be present. </p>
+<p class="enddd"></p>
+</dd>
+<dt>edge_args </dt>
+<dd><p class="startdd">TEXT. A comma-delimited string containing multiple 
named arguments of the form "name=value". The following parameters are 
supported for this string argument:</p><ul>
+<li>src (INTEGER): Name of the column containing the source vertex ids in the 
edge table. Default column name is 'src'. (This is not to be confused with the 
'source_vertex' argument passed to the BFS function.)</li>
+<li>dest (INTEGER): Name of the column containing the destination vertex ids 
in the edge table. Default column name is 'dest'.</li>
+</ul>
+<p class="enddd"></p>
+</dd>
+<dt>source_vertex </dt>
+<dd><p class="startdd">INTEGER. The source vertex id for the algorithm to 
start. This vertex id must exist in the 'vertex_id' column of 
'vertex_table'.</p>
+<p class="enddd"></p>
+</dd>
+<dt>out_table </dt>
+<dd><p class="startdd">TEXT. Name of the table to store the result of BFS. It 
contains a row for every vertex that is reachable from the source_vertex. In 
the presence of grouping columns, only those edges are used for which there are 
no NULL values in any grouping column. The output table will have the following 
columns (in addition to the grouping columns):</p><ul>
+<li>vertex_id : The id for any node reachable from source_vertex in addition 
to the source_vertex. Will use the input parameter 'vertex_id' for column 
naming.</li>
+<li>dist : The distance in number of edges (or hops) from the source_vertex to 
where this vertex is located.</li>
+<li>parent : The parent of this vertex in BFS traversal of the graph from 
source_vertex. Will use 'parent' for column naming. For the case where 
vertex_id = source_vertex, the value for parent is NULL.</li>
+</ul>
+<p>A summary table named &lt;out_table&gt;_summary is also created. This is an 
internal table that keeps a record of the input parameters. </p>
+<p class="enddd"></p>
+</dd>
+<dt>max_distance (optional) </dt>
+<dd><p class="startdd">INT, default = NULL. Maximum distance to traverse from 
the source vertex. When this value is null, traverses until reaches leaf node. 
E.g., if set to 1 will return only adjacent vertices, if set to 7 will return 
vertices up to a maximum distance of 7 vertices away.</p>
+<p class="enddd"></p>
+</dd>
+<dt>directed (optional) </dt>
+<dd><p class="startdd">BOOLEAN, default = FALSE. If TRUE the graph will be 
treated as directed, else it will be treated as an undirected graph.</p>
+<p class="enddd"></p>
+</dd>
+<dt>grouping_cols (optional) </dt>
+<dd>TEXT, default = NULL. A comma-separated list of columns used to group the 
input into discrete subgraphs. These columns must exist in the edge table. When 
this value is NULL, no grouping is used and a single BFS result is generated. 
<dl class="section note"><dt>Note</dt><dd>Expressions are not currently 
supported for 'grouping_cols'.</dd></dl>
+</dd>
+</dl>
+<p><a class="anchor" id="notes"></a></p><dl class="section 
user"><dt>Notes</dt><dd></dd></dl>
+<p>The graph_bfs function is a SQL implementation of the well-known 
breadth-first search algorithm [1] modified appropriately for a relational 
database. It will find any node in the graph reachable from the source_vertex 
only once. If a node is reachable by many different paths from the 
source_vertex (i.e. has more than one parent), then only one of those parents 
is present in the output table. The BFS result will, in general, be different 
for different choices of source_vertex.</p>
+<p><a class="anchor" id="examples"></a></p><dl class="section 
user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>Create vertex and edge tables to represent the graph: <pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER
+        );
+CREATE TABLE edge(
+        src INTEGER,
+        dest INTEGER
+        );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7),
+(8),
+(9),
+(10),
+(11)
+;
+INSERT INTO edge VALUES
+(0, 5),
+(1, 0),
+(1, 3),
+(2, 6),
+(3, 4),
+(3, 5),
+(4, 2),
+(8, 9),
+(9, 10),
+(9, 11),
+(10, 8);
+</pre></li>
+<li>Traverse undirected graph from vertex 3: <pre class="syntax">
+DROP TABLE IF EXISTS out, out_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertix id column (NULL means use 
default naming)
+                         'edge',        -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         3,             -- Source vertex for BFS
+                         'out');        -- Output table of nodes reachable 
from source_vertex
+                         -- Default values used for the other arguments
+SELECT * FROM out ORDER BY dist,id;
+</pre> <pre class="result">
+ id | dist | parent
+----+------+--------
+  3 |    0 |
+  1 |    1 |      3
+  4 |    1 |      3
+  5 |    1 |      3
+  0 |    2 |      1
+  2 |    2 |      4
+  6 |    3 |      2
+(7 rows)
+</pre> <pre class="syntax">
+SELECT * FROM out_summary;
+</pre> <pre class="result">
+ vertex_table | vertex_id | edge_table | edge_args | source_vertex | out_table 
| max_distance | directed | grouping_cols
+--------------+-----------+------------+-----------+---------------+-----------+--------------+----------+---------------
+ vertex       | NULL      | edge       | NULL      |             3 | out       
|              |          | NULL
+(1 row)
+</pre></li>
+<li>In this example, we use max_distance to limit the search distance. <pre 
class="syntax">
+DROP TABLE IF EXISTS out_max, out_max_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertix id column (NULL means use 
default naming)
+                         'edge',        -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         3,             -- Source vertex for BFS
+                         'out_max',     -- Output table of nodes reachable 
from source_vertex
+                         2);            -- Maximum distance to traverse from 
source_vertex
+                         -- Default values used for the other arguments
+SELECT * FROM out_max ORDER BY dist,id;
+</pre> <pre class="result">
+ id | dist | parent
+----+------+--------
+  3 |    0 |
+  1 |    1 |      3
+  4 |    1 |      3
+  5 |    1 |      3
+  0 |    2 |      1
+  2 |    2 |      4
+(6 rows)
+</pre></li>
+<li>Now let's do an example using different column names in the tables (i.e., 
not the defaults). Create the vertex and edge tables: <pre class="syntax">
+DROP TABLE IF EXISTS vertex_alt, edge_alt;
+CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;
+CREATE TABLE edge_alt AS SELECT src AS n1, dest AS n2 FROM edge;
+</pre></li>
+<li>Run BFS from vertex 8: <pre class="syntax">
+DROP TABLE IF EXISTS out_alt, out_alt_summary;
+SELECT madlib.graph_bfs(
+                         'vertex_alt',                  -- Vertex table
+                         'v_id',                        -- Vertex id column 
(NULL means use default naming)
+                         'edge_alt',                    -- Edge table
+                         'src=n1, dest=n2',             -- Edge arguments 
(NULL means use default naming)
+                         8,                             -- Source vertex for 
BFS
+                         'out_alt');                    -- Output table of 
nodes reachable from source_vertex
+SELECT * FROM out_alt ORDER BY v_id;
+</pre> <pre class="result">
+ v_id | dist | parent
+------+------+--------
+    8 |    0 |
+    9 |    1 |      8
+   10 |    1 |      8
+   11 |    2 |      9
+</pre></li>
+<li>Now we show an example where the graph is treated as a directed graph. 
<pre class="syntax">
+DROP TABLE IF EXISTS out_alt_dir, out_alt_dir_summary;
+SELECT madlib.graph_bfs(
+                         'vertex_alt',                  -- Vertex table
+                         'v_id',                        -- Vertex id column 
(NULL means use default naming)
+                         'edge_alt',                    -- Edge table
+                         'src=n1, dest=n2',             -- Edge arguments 
(NULL means use default naming)
+                         8,                             -- Source vertex for 
BFS
+                         'out_alt_dir',                 -- Output table of 
nodes reachable from source_vertex
+                         NULL,                          -- Maximum distance to 
traverse from source_vertex
+                         TRUE);                         -- Flag for specifying 
directed graph
+SELECT * FROM out_alt_dir ORDER BY v_id;
+</pre> <pre class="result">
+ v_id | dist | parent
+------+------+--------
+    8 |    0 |
+    9 |    1 |      8
+   10 |    2 |      9
+   11 |    2 |      9
+(4 rows)
+</pre> Notice that, with the graph being treated as directed, the parent of 
v_id=10 is now vertex 9 and not 8 as in the undirected case.</li>
+<li>Create a graph with 2 groups: <pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr(
+                  g1 INTEGER,
+                  g2 TEXT,
+                  src INTEGER,
+                  dest INTEGER
+                );
+INSERT INTO edge_gr VALUES
+(100, 'a', 0, 5),
+(100, 'a', 1, 0),
+(100, 'a', 1, 3),
+(100, 'a', 2, 6),
+(100, 'a', 3, 4),
+(100, 'a', 3, 5),
+(100, 'a', 4, 2),
+(100, 'a', 8, 9),
+(100, 'a', 9, 10),
+(100, 'a', 9, 11),
+(100, 'a', 10, 8),
+(202, 'c', 8, 9),
+(202, 'c', 9, 10),
+(202, 'c', 9, 11),
+(202, 'c', 10, 8)
+;
+</pre></li>
+<li>Run BFS for all groups from a given source_vertex. <pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use 
default naming)
+                         'edge_gr',     -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         8,             -- Source vertex for BFS
+                         'out_gr',      -- Output table of nodes reachable 
from source_vertex
+                         NULL,          -- Maximum distance to traverse from 
source_vertex
+                         NULL,          -- Flag for specifying directed graph
+                         'g1,g2'        -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY g1,g2,dist,id;
+</pre> <pre class="result">
+ g1  | g2 | id | dist | parent
+-----+----+----+------+--------
+ 100 | a  |  8 |    0 |
+ 100 | a  |  9 |    1 |      8
+ 100 | a  | 10 |    1 |      8
+ 100 | a  | 11 |    2 |      9
+ 202 | c  |  8 |    0 |
+ 202 | c  |  9 |    1 |      8
+ 202 | c  | 10 |    1 |      8
+ 202 | c  | 11 |    2 |      9
+(8 rows)
+</pre> If source_vertex is not present in a group, then that group will not 
appear in the output table. <pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_bfs(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use 
default naming)
+                         'edge_gr',     -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         3,             -- Source vertex for BFS
+                         'out_gr',      -- Output table of nodes reachable 
from source_vertex
+                         NULL,          -- Maximum distance to traverse from 
source_vertex
+                         NULL,          -- Flag for specifying directed graph
+                         'g1,g2'        -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY g1,g2,dist,id;
+</pre> <pre class="result">
+ g1  | g2 | id | dist | parent
+-----+----+----+------+--------
+ 100 | a  |  3 |    0 |
+ 100 | a  |  1 |    1 |      3
+ 100 | a  |  4 |    1 |      3
+ 100 | a  |  5 |    1 |      3
+ 100 | a  |  0 |    2 |      1
+ 100 | a  |  2 |    2 |      4
+ 100 | a  |  6 |    3 |      2
+(7 rows)
+</pre></li>
+</ol>
+<p><a class="anchor" id="literature"></a></p><dl class="section 
user"><dt>Literature</dt><dd></dd></dl>
+<p>[1] Breadth-first Search algorithm. <a 
href="https://en.wikipedia.org/wiki/Breadth-first_search";>https://en.wikipedia.org/wiki/Breadth-first_search</a>
 </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Mon Oct 15 2018 11:24:30 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
+  </ul>
+</div>
+</body>
+</html>


http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__cg.html
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__cg.html b/docs/v1.15.1/group__grp__cg.html
new file mode 100644
index 0000000..737195c
--- /dev/null
+++ b/docs/v1.15.1/group__grp__cg.html
@@ -0,0 +1,192 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.14"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Conjugate Gradient</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(initResizable);
+/* @license-end */</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(function() { init_search(); });
+/* @license-end */
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" async 
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js";></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org";><img alt="Logo" 
src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.15.1</span>
+   </div>
+   <div id="projectbrief">User Documentation for Apache MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.14 -->
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+/* @license-end */
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+$(document).ready(function(){initNavTree('group__grp__cg.html','');});
+/* @license-end */
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Conjugate Gradient<div class="ingroups"><a class="el" 
href="group__grp__early__stage.html">Early Stage Development</a></div></div>  
</div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#syntax">Function Syntax</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><dl class="section warning"><dt>Warning</dt><dd><em> This MADlib method 
is still in early stage development. There may be some issues that will be 
addressed in a future version. Interface and implementation is subject to 
change. </em></dd></dl>
+<p>This function uses the iterative conjugate gradient method [1] to find a 
solution to the function: </p><p class="formulaDsp">
+\[ \boldsymbol Ax = \boldsymbol b \]
+</p>
+<p> where \( \boldsymbol A \) is a symmetric, positive definite matrix and 
\(x\) and \( \boldsymbol b \) are vectors.</p>
+<p><a class="anchor" id="syntax"></a></p><dl class="section user"><dt>Function 
Syntax</dt><dd>Conjugate gradient returns x as an array. It has the following 
syntax.</dd></dl>
+<pre class="syntax">
+conjugate_gradient( table_name,
+                    name_of_row_values_col,
+                    name_of_row_number_col,
+                    aray_of_b_values,
+                    desired_precision
+                  )
+</pre><p>Matrix \( \boldsymbol A \) is assumed to be stored in a table where 
each row consists of at least two columns: array containing values of a given 
row, row number: </p><pre>{TABLE|VIEW} <em>matrix_A</em> (
+    <em>row_number</em> FLOAT,
+    <em>row_values</em> FLOAT[],
+)</pre><p> The number of elements in each row should be the same.</p>
+<p>\( \boldsymbol b \) is passed as a FLOAT[] to the function.</p>
+<p><a class="anchor" id="examples"></a></p><dl class="section 
user"><dt>Examples</dt><dd><ol type="1">
+<li>Construct matrix A according to structure. <pre class="example">
+SELECT * FROM data;
+</pre> Result: <pre class="result">
+ row_num | row_val
+&#160;--------+---------
+       1 | {2,1}
+       2 | {1,4}
+(2 rows)
+</pre></li>
+<li>Call the conjugate gradient function. <pre class="example">
+SELECT conjugate_gradient( 'data',
+                           'row_val',
+                           'row_num',
+                           '{2,1}',
+                           1E-6,1
+                         );
+</pre> <pre class="result">
+INFO:  COMPUTE RESIDUAL ERROR 14.5655661859659
+INFO:  ERROR 0.144934004246004
+INFO:  ERROR 3.12963615962926e-31
+INFO:  TEST FINAL ERROR 2.90029642185163e-29
+    conjugate_gradient
+&#160;--------------------------
+ {1,-1.31838984174237e-15}
+(1 row)
+</pre></li>
+</ol>
+</dd></dl>
+<p><a class="anchor" id="literature"></a></p><dl class="section 
user"><dt>Literature</dt><dd>[1] "Conjugate gradient method" Wikipedia - <a 
href="http://en.wikipedia.org/wiki/Conjugate_gradient_method";>http://en.wikipedia.org/wiki/Conjugate_gradient_method</a></dd></dl>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related 
Topics</dt><dd>File <a class="el" href="conjugate__gradient_8sql__in.html" 
title="SQL function computing Conjugate Gradient. 
">conjugate_gradient.sql_in</a> documenting the SQL function. </dd></dl>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Mon Oct 15 2018 11:24:30 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__clustered__errors.html
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__clustered__errors.html 
b/docs/v1.15.1/group__grp__clustered__errors.html
new file mode 100644
index 0000000..c2a3edb
--- /dev/null
+++ b/docs/v1.15.1/group__grp__clustered__errors.html
@@ -0,0 +1,412 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.14"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Clustered Variance</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(initResizable);
+/* @license-end */</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(function() { init_search(); });
+/* @license-end */
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" async 
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js";></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org";><img alt="Logo" 
src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.15.1</span>
+   </div>
+   <div id="projectbrief">User Documentation for Apache MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.14 -->
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+/* @license-end */
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+$(document).ready(function(){initNavTree('group__grp__clustered__errors.html','');});
+/* @license-end */
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Clustered Variance<div class="ingroups"><a class="el" 
href="group__grp__super.html">Supervised Learning</a> &raquo; <a class="el" 
href="group__grp__regml.html">Regression Models</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#train_linregr">Clustered Variance Linear Regression Training 
Function</a> </li>
+<li>
+<a href="#train_logregr">Clustered Variance Logistic Regression Training 
Function</a> </li>
+<li>
+<a href="#train_mlogregr">Clustered Variance Multinomial Logistic Regression 
Training Function</a> </li>
+<li>
+<a href="#train_cox">Clustered Variance for Cox Proportional Hazards model</a> 
</li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#notes">Notes</a> </li>
+<li>
+<a href="#background">Technical Background</a> </li>
+<li>
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>The Clustered Variance module adjusts standard errors for clustering. 
For example, replicating a dataset 100 times should not increase the precision 
of parameter estimates, but performing this procedure with the IID assumption 
will actually do this. Another example is in economics of education research, 
it is reasonable to expect that the error terms for children in the same class 
are not independent. Clustering standard errors can correct for this.</p>
+<p>The MADlib Clustered Variance module includes functions to calculate 
linear, logistic, and multinomial logistic regression problems.</p>
+<p><a class="anchor" id="train_linregr"></a></p><dl class="section 
user"><dt>Clustered Variance Linear Regression Training 
Function</dt><dd></dd></dl>
+<p>The clustered variance linear regression training function has the 
following syntax. </p><pre class="syntax">
+clustered_variance_linregr ( source_table,
+                             out_table,
+                             dependent_varname,
+                             independent_varname,
+                             clustervar,
+                             grouping_cols
+                           )
+</pre><p> <b>Arguments</b> </p><dl class="arglist">
+<dt>source_table </dt>
+<dd><p class="startdd">TEXT. The name of the table containing the input 
data.</p>
+<p class="enddd"></p>
+</dd>
+<dt>out_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the generated table containing the 
output model. The output table contains the following columns. </p><table 
class="output">
+<tr>
+<th>coef </th><td>DOUBLE PRECISION[]. Vector of the coefficients of the 
regression.  </td></tr>
+<tr>
+<th>std_err </th><td>DOUBLE PRECISION[]. Vector of the standard error of the 
coefficients.  </td></tr>
+<tr>
+<th>t_stats </th><td>DOUBLE PRECISION[]. Vector of the t-stats of the 
coefficients.  </td></tr>
+<tr>
+<th>p_values </th><td>DOUBLE PRECISION[]. Vector of the p-values of the 
coefficients.  </td></tr>
+</table>
+<p>A summary table named &lt;out_table&gt;_summary is also created, which is 
the same as the summary table created by linregr_train function. Please refer 
to the documentation for linear regression for details.</p>
+<p></p>
+<p class="enddd"></p>
+</dd>
+<dt>dependent_varname </dt>
+<dd>TEXT. An expression to evaluate for the dependent variable. </dd>
+<dt>independent_varname </dt>
+<dd>TEXT. An Expression to evalue for the independent variables. </dd>
+<dt>clustervar </dt>
+<dd>TEXT. A comma-separated list of the columns to use as cluster variables. 
</dd>
+<dt>grouping_cols (optional) </dt>
+<dd>TEXT, default: NULL. <em>Not currently implemented. Any non-NULL value is 
ignored.</em> An expression list used to group the input dataset into discrete 
groups, running one regression per group. Similar to the SQL GROUP BY clause. 
When this value is null, no grouping is used and a single result model is 
generated. </dd>
+</dl>
+<p><a class="anchor" id="train_logregr"></a></p><dl class="section 
user"><dt>Clustered Variance Logistic Regression Training 
Function</dt><dd></dd></dl>
+<p>The clustered variance logistic regression training function has the 
following syntax. </p><pre class="syntax">
+clustered_variance_logregr( source_table,
+                            out_table,
+                            dependent_varname,
+                            independent_varname,
+                            clustervar,
+                            grouping_cols,
+                            max_iter,
+                            optimizer,
+                            tolerance,
+                            verbose_mode
+                          )
+</pre><p> <b>Arguments</b> </p><dl class="arglist">
+<dt>source_table </dt>
+<dd>TEXT. The name of the table containing the input data. </dd>
+<dt>out_table </dt>
+<dd><p class="startdd">VARCHAR. Name of the generated table containing the 
output model. The output table has the following columns: </p><table 
class="output">
+<tr>
+<th>coef </th><td>Vector of the coefficients of the regression.  </td></tr>
+<tr>
+<th>std_err </th><td>Vector of the standard error of the coefficients.  
</td></tr>
+<tr>
+<th>z_stats </th><td>Vector of the z-stats of the coefficients.  </td></tr>
+<tr>
+<th>p_values </th><td>Vector of the p-values of the coefficients.  </td></tr>
+</table>
+<p>A summary table named &lt;out_table&gt;_summary is also created, which is 
the same as the summary table created by logregr_train function. Please refer 
to the documentation for logistic regression for details.</p>
+<p class="enddd"></p>
+</dd>
+<dt>dependent_varname </dt>
+<dd>TEXT. An expression to evaluate for the dependent variable. </dd>
+<dt>independent_varname </dt>
+<dd>TEXT. An expression to evaluate for the independent variable. </dd>
+<dt>clustervar </dt>
+<dd>TEXT. A comma-separated list of columns to use as cluster variables. </dd>
+<dt>grouping_cols (optional) </dt>
+<dd>TEXT, default: NULL. <em>Not yet implemented. Any non-NULL values are 
ignored.</em> An expression list used to group the input dataset into discrete 
groups, running one regression per group. Similar to the SQL GROUP BY clause. 
When this value is NULL, no grouping is used and a single result model is 
generated. </dd>
+<dt>max_iter (optional) </dt>
+<dd>INTEGER, default: 20. The maximum number of iterations that are allowed. 
</dd>
+<dt>optimizer (optional) </dt>
+<dd>TEXT, default: 'irls'. The name of the optimizer to use: <ul>
+<li>
+'newton' or 'irls': Iteratively reweighted least squares </li>
+<li>
+'cg': conjugate gradient </li>
+<li>
+'igd': incremental gradient descent. </li>
+</ul>
+</dd>
+<dt>tolerance (optional) </dt>
+<dd>FLOAT8, default: 0.0001 The difference between log-likelihood values in 
successive iterations that should indicate convergence. A zero disables the 
convergence criterion, so that execution stops after <em>n</em> Iterations have 
completed. </dd>
+<dt>verbose_mode (optional) </dt>
+<dd>BOOLEAN, default FALSE. Provides verbose_mode output of the results of 
training. </dd>
+</dl>
+<p><a class="anchor" id="train_mlogregr"></a></p><dl class="section 
user"><dt>Clustered Variance Multinomial Logistic Regression Training 
Function</dt><dd></dd></dl>
+<pre class="syntax">
+clustered_variance_mlogregr( source_table,
+                             out_table,
+                             dependent_varname,
+                             independent_varname,
+                             cluster_varname,
+                             ref_category,
+                             grouping_cols,
+                             optimizer_params,
+                             verbose_mode
+                           )
+</pre><p> <b>Arguments</b> </p><dl class="arglist">
+<dt>source_table </dt>
+<dd>TEXT. The name of the table containing the input data. </dd>
+<dt>out_table </dt>
+<dd><p class="startdd">TEXT. The name of the table where the regression model 
will be stored. The output table has the following columns: </p><table 
class="output">
+<tr>
+<th>category </th><td>The category.  </td></tr>
+<tr>
+<th>ref_category </th><td>The refererence category used for modeling.  
</td></tr>
+<tr>
+<th>coef </th><td>Vector of the coefficients of the regression.  </td></tr>
+<tr>
+<th>std_err </th><td>Vector of the standard error of the coefficients.  
</td></tr>
+<tr>
+<th>z_stats </th><td>Vector of the z-stats of the coefficients.  </td></tr>
+<tr>
+<th>p_values </th><td>Vector of the p-values of the coefficients.  </td></tr>
+</table>
+<p class="enddd">A summary table named &lt;out_table&gt;_summary is also 
created, which is the same as the summary table created by mlogregr_train 
function. Please refer to the documentation for multinomial logistic regression 
for details.  </p>
+</dd>
+<dt>dependent_varname </dt>
+<dd>TEXT. An expression to evaluate for the dependent variable. </dd>
+<dt>independent_varname </dt>
+<dd>TEXT. An expression to evaluate for the independent variable. </dd>
+<dt>cluster_varname </dt>
+<dd>TEXT. A comma-separated list of columns to use as cluster variables. </dd>
+<dt>ref_category (optional) </dt>
+<dd>INTEGER. Reference category in the range [0, num_category). </dd>
+<dt>groupingvarng_cols (optional) </dt>
+<dd>TEXT, default: NULL. <em>Not yet implemented. Any non-NULL values are 
ignored.</em> A comma-separated list of columns to use as grouping variables. 
</dd>
+<dt>optimizer_params (optional) </dt>
+<dd>TEXT, default: NULL, which uses the default values of optimizer 
parameters: max_iter=20, optimizer='newton', tolerance=1e-4. It should be a 
string that contains pairs of 'key=value' separated by commas. </dd>
+<dt>verbose_mode (optional) </dt>
+<dd>BOOLEAN, default FALSE. If TRUE, detailed information is printed when 
computing logistic regression. </dd>
+</dl>
+<p><a class="anchor" id="train_cox"></a></p><dl class="section 
user"><dt>Clustered Variance for Cox Proportional Hazards 
model</dt><dd></dd></dl>
+<p>The clustered robust variance estimator function for the Cox Proportional 
Hazards model has the following syntax. </p><pre class="syntax">
+clustered_variance_coxph(model_table, output_table, clustervar)
+</pre><p><b>Arguments</b> </p><dl class="arglist">
+<dt>model_table </dt>
+<dd>TEXT. The name of the model table, which is exactaly the same as the 
'output_table' parameter of <a class="el" 
href="cox__prop__hazards_8sql__in.html#a737450bbfe0f10204b0074a9d45b0cef" 
title="Compute cox-regression coefficients and diagnostic statistics. 
">coxph_train()</a> function. </dd>
+<dt>output_table </dt>
+<dd>TEXT. The name of the table where the output is saved. It has the 
following columns: <table class="output">
+<tr>
+<th>coef </th><td>FLOAT8[]. Vector of the coefficients.  </td></tr>
+<tr>
+<th>loglikelihood </th><td>FLOAT8. Log-likelihood value of the MLE estimate.  
</td></tr>
+<tr>
+<th>std_err </th><td>FLOAT8[]. Vector of the standard error of the 
coefficients.  </td></tr>
+<tr>
+<th>clustervar </th><td>TEXT. A comma-separated list of columns to use as 
cluster variables.  </td></tr>
+<tr>
+<th>clustered_se </th><td>FLOAT8[]. Vector of the robust standard errors of 
the coefficients.  </td></tr>
+<tr>
+<th>clustered_z </th><td>FLOAT8[]. Vector of the robust z-stats of the 
coefficients.  </td></tr>
+<tr>
+<th>clustered_p </th><td>FLOAT8[]. Vector of the robust p-values of the 
coefficients.  </td></tr>
+<tr>
+<th>hessian </th><td>FLOAT8[]. The Hessian matrix.  </td></tr>
+</table>
+</dd>
+<dt>clustervar </dt>
+<dd>TEXT. A comma-separated list of columns to use as cluster variables. </dd>
+</dl>
+<p><a class="anchor" id="examples"></a></p><dl class="section 
user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>View online help for the clustered variance linear regression function. 
<pre class="example">
+SELECT madlib.clustered_variance_linregr();
+</pre></li>
+<li>Run the linear regression function and view the results. <pre 
class="example">
+DROP TABLE IF EXISTS out_table;
+SELECT madlib.clustered_variance_linregr( 'abalone',
+                                          'out_table',
+                                          'rings',
+                                          'ARRAY[1, diameter, length, width]',
+                                          'sex',
+                                          NULL
+                                        );
+SELECT * FROM out_table;
+</pre></li>
+<li>View online help for the clustered variance logistic regression function. 
<pre class="example">
+SELECT madlib.clustered_variance_logregr();
+</pre></li>
+<li>Run the logistic regression function and view the results. <pre 
class="example">
+DROP TABLE IF EXISTS out_table;
+SELECT madlib.clustered_variance_logregr( 'abalone',
+                                          'out_table',
+                                          'rings &lt; 10',
+                                          'ARRAY[1, diameter, length, width]',
+                                          'sex'
+                                        );
+SELECT * FROM out_table;
+</pre></li>
+<li>View online help for the clustered variance multinomial logistic 
regression function. <pre class="example">
+SELECT madlib.clustered_variance_mlogregr();
+</pre></li>
+<li>Run the multinomial logistic regression and view the results. <pre 
class="example">
+DROP TABLE IF EXISTS out_table;
+SELECT madlib.clustered_variance_mlogregr( 'abalone',
+                                           'out_table',
+                                           'CASE WHEN rings &lt; 10 THEN 1 
ELSE 0 END',
+                                           'ARRAY[1, diameter, length, width]',
+                                           'sex',
+                                           0
+                                         );
+SELECT * FROM out_table;
+</pre></li>
+<li>Run the Cox Proportional Hazards regression and compute the clustered 
robust estimator. <pre class="example">
+DROP TABLE IF EXISTS lung_cl_out;
+DROP TABLE IF EXISTS lung_out;
+DROP TABLE IF EXISTS lung_out_summary;
+SELECT madlib.coxph_train('lung',
+                          'lung_out',
+                          'time',
+                          'array[age, "ph.ecog"]',
+                          'TRUE',
+                          NULL,
+                          NULL);
+SELECT madlib.clustered_variance_coxph('lung_out',
+                                       'lung_cl_out',
+                                       '"ph.karno"');
+SELECT * FROM lung_cl_out;
+</pre></li>
+</ol>
+<p><a class="anchor" id="notes"></a></p><dl class="section 
user"><dt>Notes</dt><dd></dd></dl>
+<ul>
+<li>Note that we need to manually include an intercept term in the independent 
variable expression. The NULL value of <em>groupingvar</em> means that there is 
no grouping in the calculation.</li>
+</ul>
+<p><a class="anchor" id="background"></a></p><dl class="section 
user"><dt>Technical Background</dt><dd></dd></dl>
+<p>Assume that the data can be separated into \(m\) clusters. Usually this can 
be done by grouping the data table according to one or multiple columns.</p>
+<p>The estimator has a similar form to the usual sandwich estimator </p><p 
class="formulaDsp">
+\[ S(\vec{c}) = B(\vec{c}) M(\vec{c}) B(\vec{c}) \]
+</p>
+<p>The bread part is the same as Huber-White sandwich estimator </p><p 
class="formulaDsp">
+\begin{eqnarray} B(\vec{c}) &amp; = &amp; \left(-\sum_{i=1}^{n} H(y_i, 
\vec{x}_i, \vec{c})\right)^{-1}\\ &amp; = &amp; 
\left(-\sum_{i=1}^{n}\frac{\partial^2 l(y_i, \vec{x}_i, \vec{c})}{\partial 
c_\alpha \partial c_\beta}\right)^{-1} \end{eqnarray}
+</p>
+<p> where \(H\) is the hessian matrix, which is the second derivative of the 
target function </p><p class="formulaDsp">
+\[ L(\vec{c}) = \sum_{i=1}^n l(y_i, \vec{x}_i, \vec{c})\ . \]
+</p>
+<p>The meat part is different </p><p class="formulaDsp">
+\[ M(\vec{c}) = \bf{A}^T\bf{A} \]
+</p>
+<p> where the \(m\)-th row of \(\bf{A}\) is </p><p class="formulaDsp">
+\[ A_m = \sum_{i\in G_m}\frac{\partial l(y_i,\vec{x}_i,\vec{c})}{\partial 
\vec{c}} \]
+</p>
+<p> where \(G_m\) is the set of rows that belong to the same cluster.</p>
+<p>We can compute the quantities of \(B\) and \(A\) for each cluster during 
one scan through the data table in an aggregate function. Then sum over all 
clusters to the full \(B\) and \(A\) in the outside of the aggregate function. 
At last, the matrix mulplitications are done in a separate function on the 
master node.</p>
+<p>When multinomial logistic regression is computed before the multinomial 
clustered variance calculation, it uses a default reference category of zero 
and the regression coefficients are included in the output table. The 
regression coefficients in the output are in the same order as multinomial 
logistic regression function, which is described below. For a problem with \( K 
\) dependent variables \( (1, ..., K) \) and \( J \) categories \( (0, ..., 
J-1) \), let \( {m_{k,j}} \) denote the coefficient for dependent variable \( k 
\) and category \( j \). The output is \( {m_{k_1, j_0}, m_{k_1, j_1} \ldots 
m_{k_1, j_{J-1}}, m_{k_2, j_0}, m_{k_2, j_1} \ldots m_{k_K, j_{J-1}}} \). The 
order is NOT CONSISTENT with the multinomial regression marginal effect 
calculation with function <em>marginal_mlogregr</em>. This is deliberate 
because the interfaces of all multinomial regressions (robust, clustered, ...) 
will be moved to match that used in marginal.</p>
+<p><a class="anchor" id="literature"></a></p><dl class="section 
user"><dt>Literature</dt><dd></dd></dl>
+<p>[1] Standard, Robust, and Clustered Standard Errors Computed in R, <a 
href="http://diffuseprior.wordpress.com/2012/06/15/standard-robust-and-clustered-standard-errors-computed-in-r/";>http://diffuseprior.wordpress.com/2012/06/15/standard-robust-and-clustered-standard-errors-computed-in-r/</a></p>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related 
Topics</dt><dd>File <a class="el" 
href="clustered__variance_8sql__in.html">clustered_variance.sql_in</a> 
documenting the clustered variance SQL functions.</dd></dl>
+<p>File <a class="el" href="clustered__variance__coxph_8sql__in.html" 
title="SQL functions for clustered robust cox proportional hazards regression. 
">clustered_variance_coxph.sql_in</a> documenting the clustered variance for 
Cox proportional hazards SQL functions.</p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Mon Oct 15 2018 11:24:30 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__clustering.html
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__clustering.html 
b/docs/v1.15.1/group__grp__clustering.html
new file mode 100644
index 0000000..b6cb8a6
--- /dev/null
+++ b/docs/v1.15.1/group__grp__clustering.html
@@ -0,0 +1,146 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.14"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Clustering</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(initResizable);
+/* @license-end */</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(function() { init_search(); });
+/* @license-end */
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" async 
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js";></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org";><img alt="Logo" 
src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.15.1</span>
+   </div>
+   <div id="projectbrief">User Documentation for Apache MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.14 -->
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+/* @license-end */
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+$(document).ready(function(){initNavTree('group__grp__clustering.html','');});
+/* @license-end */
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Clustering<div class="ingroups"><a class="el" 
href="group__grp__unsupervised.html">Unsupervised Learning</a></div></div>  
</div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed 
Description</h2>
+<p>Methods for clustering data. </p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a 
name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__kmeans"><td class="memItemLeft" align="right" 
valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" 
href="group__grp__kmeans.html">k-Means Clustering</a></td></tr>
+<tr class="memdesc:group__grp__kmeans"><td class="mdescLeft">&#160;</td><td 
class="mdescRight">Partitions a set of observations into clusters by finding 
centroids that minimize the sum of observations' distances from their closest 
centroid. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Mon Oct 15 2018 11:24:30 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__clustering.js
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__clustering.js 
b/docs/v1.15.1/group__grp__clustering.js
new file mode 100644
index 0000000..61858da
--- /dev/null
+++ b/docs/v1.15.1/group__grp__clustering.js
@@ -0,0 +1,4 @@
+var group__grp__clustering =
+[
+    [ "k-Means Clustering", "group__grp__kmeans.html", null ]
+];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/af0e5f14/docs/v1.15.1/group__grp__cols2vec.html
----------------------------------------------------------------------
diff --git a/docs/v1.15.1/group__grp__cols2vec.html 
b/docs/v1.15.1/group__grp__cols2vec.html
new file mode 100644
index 0000000..48712be
--- /dev/null
+++ b/docs/v1.15.1/group__grp__cols2vec.html
@@ -0,0 +1,407 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.14"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Columns to Vector</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(initResizable);
+/* @license-end */</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+  $(document).ready(function() { init_search(); });
+/* @license-end */
+</script>
+<script type="text/x-mathjax-config">
+  MathJax.Hub.Config({
+    extensions: ["tex2jax.js", "TeX/AMSmath.js", "TeX/AMSsymbols.js"],
+    jax: ["input/TeX","output/HTML-CSS"],
+});
+</script><script type="text/javascript" async 
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js";></script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.apache.org";><img alt="Logo" 
src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ ></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.15.1</span>
+   </div>
+   <div id="projectbrief">User Documentation for Apache MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.14 -->
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+/* @license-end */
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+/* @license 
magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt 
GPL-v2 */
+$(document).ready(function(){initNavTree('group__grp__cols2vec.html','');});
+/* @license-end */
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Columns to Vector<div class="ingroups"><a class="el" 
href="group__grp__other__functions.html">Utilities</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li class="level1">
+<a href="#cols2vec_syntax">Syntax</a> </li>
+<li class="level1">
+<a href="#cols2vec_usage">Usage</a> </li>
+<li class="level1">
+<a href="#cols2vec_example">Examples</a> </li>
+</ul>
+</div><dl class="section user"><dt>About</dt><dd>Convert feature columns in a 
table into an array in a single column.</dd></dl>
+<p>Given a table with a number of feature columns, this function will create 
an output table that contains the feature columns in an array. A summary table 
will also be created that contains the names of the features combined into 
array, so that this process can be reversed using the function vec2cols.</p>
+<p><a class="anchor" id="cols2vec_usage"></a></p><dl class="section 
user"><dt>Usage</dt><dd></dd></dl>
+<pre class="syntax">
+cols2vec(
+    source_table,
+    output_table,
+    list_of_features,
+    list_of_features_to_exclude,
+    cols_to_output
+)
+</pre><p><b>Arguments</b> </p><dl class="arglist">
+<dt>source_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the source data.</p>
+<p class="enddd"></p>
+</dd>
+<dt>output_table </dt>
+<dd><p class="startdd">TEXT. Name of the generated table containing the 
output.</p>
+<p class="enddd"></p>
+</dd>
+<dt>list_of_features </dt>
+<dd><p class="startdd">TEXT. Comma-separated string of column names or 
expressions to put into feature array. Can also be '*' implying all columns are 
to be put into feature array (except for the ones included in the next argument 
that lists exclusions). Type casting will be done as per the regular type 
casting rules of the underlying database. Array columns in the source table are 
not supported in the 'list_of_features' parameter. Also, all of the features to 
be included must be of the same type and must not have null values.</p>
+<p class="enddd"></p>
+</dd>
+<dt>list_of_features_to_exclude (optional) </dt>
+<dd><p class="startdd">TEXT. Default NULL. Comma-separated string of column 
names to exclude from the feature array. Typically used when 'list_of_features' 
is set to '*'.</p>
+<p class="enddd"></p>
+</dd>
+<dt>cols_to_output (optional) </dt>
+<dd>TEXT. Default NULL. Comma-separated string of column names from the source 
table to keep in the output table, in addition to the feature array. To keep 
all columns from the source table, use '*' for this parameter.  </dd>
+</dl>
+<p><b>Output table</b> <br />
+ The output table produced by the cols2vec function contains the following 
columns: </p><table class="output">
+<tr>
+<th>&lt;...&gt; </th><td>Columns from source table, depending on which ones 
are kept (if any).   </td></tr>
+<tr>
+<th>feature_vector </th><td>Column that contains the feature array.  </td></tr>
+</table>
+<p><b>Output summary table</b> <br />
+ A summary table named <em>&lt;output_table&gt;_summary</em> is also created 
that contains: </p><table class="output">
+<tr>
+<th>source_table </th><td>Name of the table containing the source data.  
</td></tr>
+<tr>
+<th>list_of_features </th><td>List of features to put in vector.  </td></tr>
+<tr>
+<th>list_of_features_to_exclude </th><td>Features specified by the user to 
exclude from 'list_of_features'.  </td></tr>
+<tr>
+<th>feature_names </th><td>Names of the features that were nested (converted 
to a vector) in the output table.  </td></tr>
+</table>
+<p><a class="anchor" id="cols2vec_example"></a></p><dl class="section 
user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>Load sample data: <pre class="example">
+DROP TABLE IF EXISTS golf CASCADE;
+CREATE TABLE golf (
+    id integer NOT NULL,
+    "OUTLOOK" text,
+    temperature double precision,
+    humidity double precision,
+    "Temp_Humidity" double precision[],
+    clouds_airquality text[],
+    windy boolean,
+    class text,
+    observation_weight double precision
+);
+INSERT INTO golf VALUES
+(1,'sunny', 85, 85, ARRAY[85, 85],ARRAY['none', 'unhealthy'], 'false','Don''t 
Play', 5.0),
+(2, 'sunny', 80, 90, ARRAY[80, 90], ARRAY['none', 'moderate'], 'true', 'Don''t 
Play', 5.0),
+(3, 'overcast', 83, 78, ARRAY[83, 78], ARRAY['low', 'moderate'], 'false', 
'Play', 1.5),
+(4, 'rain', 70, 96, ARRAY[70, 96], ARRAY['low', 'moderate'], 'false', 'Play', 
1.0),
+(5, 'rain', 68, 80, ARRAY[68, 80], ARRAY['medium', 'good'], 'false', 'Play', 
1.0),
+(6, 'rain', 65, 70, ARRAY[65, 70], ARRAY['low', 'unhealthy'], 'true', 'Don''t 
Play', 1.0),
+(7, 'overcast', 64, 65, ARRAY[64, 65], ARRAY['medium', 'moderate'], 'true', 
'Play', 1.5),
+(8, 'sunny', 72, 95, ARRAY[72, 95], ARRAY['high', 'unhealthy'], 'false', 
'Don''t Play', 5.0),
+(9, 'sunny', 69, 70, ARRAY[69, 70], ARRAY['high', 'good'], 'false', 'Play', 
5.0),
+(10, 'rain', 75, 80, ARRAY[75, 80], ARRAY['medium', 'good'], 'false', 'Play', 
1.0),
+(11, 'sunny', 75, 70, ARRAY[75, 70], ARRAY['none', 'good'], 'true', 'Play', 
5.0),
+(12, 'overcast', 72, 90, ARRAY[72, 90], ARRAY['medium', 'moderate'], 'true', 
'Play', 1.5),
+(13, 'overcast', 81, 75, ARRAY[81, 75], ARRAY['medium', 'moderate'], 'false', 
'Play', 1.5),
+(14, 'rain', 71, 80, ARRAY[71, 80], ARRAY['low', 'unhealthy'], 'true', 'Don''t 
Play', 1.0);
+</pre></li>
+<li>Run cols2vec to combine the temperature and humidity columns into a single 
array feature. <pre class="example">
+DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;
+SELECT madlib.cols2vec(
+    'golf',
+    'cols2vec_result',
+    'temperature, humidity'
+);
+SELECT * FROM cols2vec_result;
+</pre> <pre class="result">
+ feature_vector
+----------------+
+ {85,85}
+ {80,90}
+ {83,78}
+ {70,96}
+ {68,80}
+ {65,70}
+ {64,65}
+ {72,95}
+ {69,70}
+ {75,80}
+ {75,70}
+ {72,90}
+ {81,75}
+ {71,80}
+(14 rows)
+</pre> View the summary table: <pre class="example">
+\x on
+SELECT * FROM cols2vec_result_summary;
+\x off
+</pre> <pre class="result">
+-[ RECORD 1 
]---------------+----------------------------------------------------------------
+source_table                | golf
+list_of_features            | temperature, humidity
+list_of_features_to_exclude | None
+feature_names               | {temperature,humidity}
+</pre></li>
+<li>Combine the temperature and humidity columns and keep 2 other columns from 
source_table. <pre class="example">
+DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;
+SELECT madlib.cols2vec(
+    'golf',
+    'cols2vec_result',
+    'temperature, humidity',
+    NULL,
+    'id, "OUTLOOK"'
+);
+SELECT * FROM cols2vec_result ORDER BY id;
+</pre> <pre class="result">
+ id | OUTLOOK  | feature_vector
+----+----------+----------------
+  1 | sunny    | {85,85}
+  2 | sunny    | {80,90}
+  3 | overcast | {83,78}
+  4 | rain     | {70,96}
+  5 | rain     | {68,80}
+  6 | rain     | {65,70}
+  7 | overcast | {64,65}
+  8 | sunny    | {72,95}
+  9 | sunny    | {69,70}
+ 10 | rain     | {75,80}
+ 11 | sunny    | {75,70}
+ 12 | overcast | {72,90}
+ 13 | overcast | {81,75}
+ 14 | rain     | {71,80}
+(14 rows)
+</pre> View the summary table: <pre class="example">
+\x on
+SELECT * FROM cols2vec_result_summary;
+\x off
+</pre> <pre class="result">
+-[ RECORD 1 
]---------------+----------------------------------------------------------------
+source_table                | golf
+list_of_features            | temperature, humidity
+list_of_features_to_exclude | None
+feature_names               | {temperature,humidity}
+</pre></li>
+<li>Combine all columns, excluding all columns that are not of type double 
precision. <pre class="example">
+DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;
+SELECT madlib.cols2vec(
+    'golf',
+    'cols2vec_result',
+    '*',
+    '"OUTLOOK", "Temp_Humidity", clouds_airquality, windy, class, id',
+    'id, "OUTLOOK"'
+);
+SELECT * FROM cols2vec_result ORDER BY id;
+</pre> <pre class="result">
+ id | OUTLOOK  | feature_vector
+----+----------+----------------
+  1 | sunny    | {85,85,5}
+  2 | sunny    | {80,90,5}
+  3 | overcast | {83,78,1.5}
+  4 | rain     | {70,96,1}
+  5 | rain     | {68,80,1}
+  6 | rain     | {65,70,1}
+  7 | overcast | {64,65,1.5}
+  8 | sunny    | {72,95,5}
+  9 | sunny    | {69,70,5}
+ 10 | rain     | {75,80,1}
+ 11 | sunny    | {75,70,5}
+ 12 | overcast | {72,90,1.5}
+ 13 | overcast | {81,75,1.5}
+ 14 | rain     | {71,80,1}
+(14 rows)
+</pre> View summary table: <pre class="example">
+\x on
+SELECT * FROM cols2vec_result_summary;
+\x off
+</pre> <pre class="result">
+-[ RECORD 1 
]---------------+----------------------------------------------------------------
+source_table                | golf
+list_of_features            | *
+list_of_features_to_exclude | "OUTLOOK", "Temp_Humidity", clouds_airquality, 
windy, class, id
+feature_names               | {temperature,humidity,observation_weight}
+</pre></li>
+<li>Combine the temperature and humidity columns, exclude windy, and keep all 
of the columns from the source table. <pre class="example">
+DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;
+SELECT madlib.cols2vec(
+    'golf',
+    'cols2vec_result',
+    'windy, temperature, humidity',
+    'windy',
+    '*'
+);
+SELECT * FROM cols2vec_result ORDER BY id;
+</pre> <pre class="result">
+ id | OUTLOOK  | temperature | humidity | Temp_Humidity | clouds_airquality | 
windy |   class    | observation_weight | feature_vector
+----+----------+-------------+----------+---------------+-------------------+-------+------------+--------------------+----------------
+  1 | sunny    |          85 |       85 | {85,85}       | {none,unhealthy}  | 
f     | Don't Play |                  5 | {85,85}
+  2 | sunny    |          80 |       90 | {80,90}       | {none,moderate}   | 
t     | Don't Play |                  5 | {80,90}
+  3 | overcast |          83 |       78 | {83,78}       | {low,moderate}    | 
f     | Play       |                1.5 | {83,78}
+  4 | rain     |          70 |       96 | {70,96}       | {low,moderate}    | 
f     | Play       |                  1 | {70,96}
+  5 | rain     |          68 |       80 | {68,80}       | {medium,good}     | 
f     | Play       |                  1 | {68,80}
+  6 | rain     |          65 |       70 | {65,70}       | {low,unhealthy}   | 
t     | Don't Play |                  1 | {65,70}
+  7 | overcast |          64 |       65 | {64,65}       | {medium,moderate} | 
t     | Play       |                1.5 | {64,65}
+  8 | sunny    |          72 |       95 | {72,95}       | {high,unhealthy}  | 
f     | Don't Play |                  5 | {72,95}
+  9 | sunny    |          69 |       70 | {69,70}       | {high,good}       | 
f     | Play       |                  5 | {69,70}
+ 10 | rain     |          75 |       80 | {75,80}       | {medium,good}     | 
f     | Play       |                  1 | {75,80}
+ 11 | sunny    |          75 |       70 | {75,70}       | {none,good}       | 
t     | Play       |                  5 | {75,70}
+ 12 | overcast |          72 |       90 | {72,90}       | {medium,moderate} | 
t     | Play       |                1.5 | {72,90}
+ 13 | overcast |          81 |       75 | {81,75}       | {medium,moderate} | 
f     | Play       |                1.5 | {81,75}
+ 14 | rain     |          71 |       80 | {71,80}       | {low,unhealthy}   | 
t     | Don't Play |                  1 | {71,80}
+(14 rows)
+</pre> View the summary table: <pre class="example">
+\x on
+SELECT * FROM cols2vec_result_summary;
+\x off
+</pre> <pre class="result">
+-[ RECORD 1 ]---------------+-----------------------------
+source_table                | golf
+list_of_features            | windy, temperature, humidity
+list_of_features_to_exclude | windy
+feature_names               | {temperature,humidity}
+</pre> This also shows that you can exclude features in 
'list_of_features_to_exclude' that are in the list of 'list_of_features'. This 
can be useful if the 'list_of_features' is generated from an expression or 
subquery.</li>
+<li>Type casting works as per regular rules of the underlying database. E.g, 
combining integer and double precisions columns will create a double precision 
feature vector. For Boolean, do an explicit cast to the target type: <pre 
class="example">
+DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;
+SELECT madlib.cols2vec(
+    'golf',
+    'cols2vec_result',
+    'windy::TEXT, class',
+    NULL,
+    'id'
+);
+SELECT * FROM cols2vec_result ORDER BY id;
+</pre> <pre class="result">
+ id |    feature_vector
+-&mdash;+-------------------&mdash;
+  1 | {false,"Don't Play"}
+  2 | {true,"Don't Play"}
+  3 | {false,Play}
+  4 | {false,Play}
+  5 | {false,Play}
+  6 | {true,"Don't Play"}
+  7 | {true,Play}
+  8 | {false,"Don't Play"}
+  9 | {false,Play}
+ 10 | {false,Play}
+ 11 | {true,Play}
+ 12 | {true,Play}
+ 13 | {false,Play}
+ 14 | {true,"Don't Play"}
+(14 rows)
+</pre> </li>
+</ol>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Mon Oct 15 2018 11:24:30 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.14 </li>
+  </ul>
+</div>
+</body>
+</html>

[28/51] [partial] madlib-site git commit: Doc: Add v1.15.1 documentation

Reply via email to