[16/51] [partial] incubator-madlib-site git commit: Add v1.11 docs

riyer Tue, 16 May 2017 13:30:16 -0700

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/b5b51c69/docs/v1.11/group__grp__sssp.html
----------------------------------------------------------------------
diff --git a/docs/v1.11/group__grp__sssp.html b/docs/v1.11/group__grp__sssp.html
new file mode 100644
index 0000000..27e6008
--- /dev/null
+++ b/docs/v1.11/group__grp__sssp.html
@@ -0,0 +1,357 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Single Source Shortest Path</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.incubator.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.incubator.apache.org";><img 
alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ 
></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.11</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__sssp.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Single Source Shortest Path<div class="ingroups"><a 
class="el" href="group__grp__graph.html">Graph</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#sssp">SSSP</a> </li>
+<li>
+<a href="#notes">Notes</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#literature">Literature</a> </li>
+</ul>
+</div><p>Given a graph and a source vertex, the single source shortest path 
(SSSP) algorithm finds a path from the source vertex to every other vertex in 
the graph, such that the sum of the weights of the path edges is minimized.</p>
+<p><a class="anchor" id="sssp"></a></p><dl class="section 
user"><dt>SSSP</dt><dd><pre class="syntax">
+graph_sssp( vertex_table,
+            vertex_id,
+            edge_table,
+            edge_args,
+            source_vertex,
+            out_table,
+            grouping_cols
+          )
+</pre></dd></dl>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>vertex_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the vertex data for 
the graph. Must contain the column specified in the 'vertex_id' parameter 
below.</p>
+<p class="enddd"></p>
+</dd>
+<dt>vertex_id </dt>
+<dd><p class="startdd">TEXT, default = 'id'. Name of the column in 
'vertex_table' containing vertex ids. The vertex ids are of type INTEGER with 
no duplicates. They do not need to be contiguous.</p>
+<p class="enddd"></p>
+</dd>
+<dt>edge_table </dt>
+<dd><p class="startdd">TEXT. Name of the table containing the edge data. The 
edge table must contain columns for source vertex, destination vertex and edge 
weight. Column naming convention is described below in the 'edge_args' 
parameter.</p>
+<p class="enddd"></p>
+</dd>
+<dt>edge_args </dt>
+<dd><p class="startdd">TEXT. A comma-delimited string containing multiple 
named arguments of the form "name=value". The following parameters are 
supported for this string argument:</p><ul>
+<li>src (INTEGER): Name of the column containing the source vertex ids in the 
edge table. Default column name is 'src'.</li>
+<li>dest (INTEGER): Name of the column containing the destination vertex ids 
in the edge table. Default column name is 'dest'.</li>
+<li>weight (FLOAT8): Name of the column containing the edge weights in the 
edge table. Default column name is 'weight'.</li>
+</ul>
+<p class="enddd"></p>
+</dd>
+<dt>source_vertex </dt>
+<dd><p class="startdd">INTEGER. The source vertex id for the algorithm to 
start. This vertex id must exist in the 'vertex_id' column of 
'vertex_table'.</p>
+<p class="enddd"></p>
+</dd>
+<dt>out_table </dt>
+<dd><p class="startdd">TEXT. Name of the table to store the result of SSSP. It 
contains a row for every vertex of every group and have the following columns 
(in addition to the grouping columns):</p><ul>
+<li>vertex_id : The id for the destination. Will use the input parameter 
'vertex_id' for column naming.</li>
+<li>weight : The total weight of the shortest path from the source vertex to 
this particular vertex. Will use the input parameter 'weight' for column 
naming.</li>
+<li>parent : The parent of this vertex in the shortest path from source. Will 
use 'parent' for column naming.</li>
+</ul>
+<p>A summary table named &lt;out_table&gt;_summary is also created. This is an 
internal table that keeps a record of the input parameters and is used by the 
path function described below. </p>
+<p class="enddd"></p>
+</dd>
+<dt>grouping_cols </dt>
+<dd>TEXT, default = NULL. List of columns used to group the input into 
discrete subgraphs. These columns must exist in the edge table. When this value 
is null, no grouping is used and a single SSSP result is generated.  </dd>
+</dl>
+<dl class="section user"><dt>Path Retrieval</dt><dd></dd></dl>
+<p>The path retrieval function returns the shortest path from the source 
vertex to a specified desination vertex.</p>
+<pre class="syntax">
+graph_sssp_get_path( sssp_table,
+                     dest_vertex,
+                     path_table
+                    )
+</pre><p><b>Arguments</b> </p><dl class="arglist">
+<dt>sssp_table </dt>
+<dd><p class="startdd">TEXT. Name of the table that contains the SSSP 
output.</p>
+<p class="enddd"></p>
+</dd>
+<dt>dest_vertex </dt>
+<dd><p class="startdd">INTEGER. The vertex that will be the destination of the 
desired path.</p>
+<p class="enddd"></p>
+</dd>
+<dt>path_table </dt>
+<dd><p class="startdd">TEXT. Name of the output table that contains the path. 
It contains a row for every group and has the following columns:</p><ul>
+<li>grouping_cols : The grouping columns given in the creation of the SSSP 
table. If there are no grouping columns, these columns will not exist and the 
table will have a single row.</li>
+<li>path (ARRAY) : The shortest path from the source vertex (as specified in 
the SSSP execution) to the destination vertex. </li>
+</ul>
+<p class="enddd"></p>
+</dd>
+</dl>
+<p><a class="anchor" id="notes"></a></p><dl class="section 
user"><dt>Notes</dt><dd></dd></dl>
+<p>The Bellman-Ford algorithm [1] is used to implement SSSP. This algorithm 
allows negative edges but not negative cycles. In the case of graphs with 
negative cycles, an error will be given and no output table will be 
generated.</p>
+<p>Also see the Grail project [2] for more background on graph analytics 
processing in relational databases.</p>
+<p><a class="anchor" id="examples"></a></p><dl class="section 
user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>Create vertex and edge tables to represent the graph: <pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER
+        );
+CREATE TABLE edge(
+        src INTEGER,
+        dest INTEGER,
+        weight FLOAT8
+        );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7);
+INSERT INTO edge VALUES
+(0, 1, 1.0),
+(0, 2, 1.0),
+(0, 4, 10.0),
+(1, 2, 2.0),
+(1, 3, 10.0),
+(2, 3, 1.0),
+(2, 5, 1.0),
+(2, 6, 3.0),
+(3, 0, 1.0),
+(4, 0, -2.0),
+(5, 6, 1.0),
+(6, 7, 1.0);
+</pre></li>
+<li>Calculate the shortest paths from vertex 0: <pre class="syntax">
+DROP TABLE IF EXISTS out, out_summary;
+SELECT madlib.graph_sssp(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertix id column (NULL means use 
default naming)
+                         'edge',        -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         0,             -- Source vertex for path calculation
+                         'out');        -- Output table of shortest paths
+SELECT * FROM out ORDER BY id;
+</pre> <pre class="result">
+ id | weight | parent
+----+--------+--------
+  0 |      0 |      0
+  1 |      1 |      0
+  2 |      1 |      0
+  3 |      2 |      2
+  4 |     10 |      0
+  5 |      2 |      2
+  6 |      3 |      5
+  7 |      4 |      6
+(8 rows)
+</pre></li>
+<li>Get the shortest path to vertex 5: <pre class="syntax">
+DROP TABLE IF EXISTS out_path;
+SELECT madlib.graph_sssp_get_path('out',5,'out_path');
+SELECT * FROM out_path;
+</pre> <pre class="result">
+  path
+---------
+ {0,2,5}
+</pre></li>
+<li>Now let's do a similar example except using different column names in the 
tables (i.e., not the defaults). Create the vertex and edge tables: <pre 
class="syntax">
+DROP TABLE IF EXISTS vertex_alt, edge_alt;
+CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;
+CREATE TABLE edge_alt AS SELECT src AS e_src, dest, weight AS e_weight FROM 
edge;
+</pre></li>
+<li>Get the shortest path from vertex 1: <pre class="syntax">
+DROP TABLE IF EXISTS out_alt, out_alt_summary;
+SELECT madlib.graph_sssp(
+                         'vertex_alt',                  -- Vertex table
+                         'v_id',                        -- Vertex id column 
(NULL means use default naming)
+                         'edge_alt',                    -- Edge table
+                         'src=e_src, weight=e_weight',  -- Edge arguments 
(NULL means use default naming)
+                         1,                             -- Source vertex for 
path calculation
+                         'out_alt');                    -- Output table of 
shortest paths
+SELECT * FROM out_alt ORDER BY v_id;
+</pre> <pre class="result">
+ v_id | e_weight | parent
+------+----------+--------
+    0 |        4 |      3
+    1 |        0 |      1
+    2 |        2 |      1
+    3 |        3 |      2
+    4 |       14 |      0
+    5 |        3 |      2
+    6 |        4 |      5
+    7 |        5 |      6
+(8 rows)
+</pre></li>
+<li>Create a graph with 2 groups: <pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr AS
+(
+  SELECT *, 0 AS grp FROM edge
+  UNION
+  SELECT *, 1 AS grp FROM edge WHERE src &lt; 6 AND dest &lt; 6
+);
+INSERT INTO edge_gr VALUES
+(4,5,-20,1);
+</pre></li>
+<li>Find SSSP for all groups <pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_sssp(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use 
default naming)
+                         'edge_gr',     -- Edge table
+                         NULL,          -- Edge arguments (NULL means use 
default naming)
+                         0,             -- Source vertex for path calculation
+                         'out_gr',      -- Output table of shortest paths
+                         'grp'          -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY grp,id;
+</pre> <pre class="result">
+ grp | id | weight | parent
+-----+----+--------+--------
+   0 |  0 |      0 |      0
+   0 |  1 |      1 |      0
+   0 |  2 |      1 |      0
+   0 |  3 |      2 |      2
+   0 |  4 |     10 |      0
+   0 |  5 |      2 |      2
+   0 |  6 |      3 |      5
+   0 |  7 |      4 |      6
+   1 |  0 |      0 |      0
+   1 |  1 |      1 |      0
+   1 |  2 |      1 |      0
+   1 |  3 |      2 |      2
+   1 |  4 |     10 |      0
+   1 |  5 |    -10 |      4
+</pre></li>
+<li>Find the path to vertex 5 in every group <pre class="syntax">
+DROP TABLE IF EXISTS out_gr_path;
+SELECT madlib.graph_sssp_get_path('out_gr',5,'out_gr_path');
+SELECT * FROM out_gr_path ORDER BY grp;
+</pre> <pre class="result">
+ grp |  path
+-----+---------
+   0 | {0,2,5}
+   1 | {0,4,5}
+</pre></li>
+</ol>
+<p><a class="anchor" id="literature"></a></p><dl class="section 
user"><dt>Literature</dt><dd></dd></dl>
+<p>[1] BellmanâFord algorithm. <a 
href="https://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm";>https://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm</a></p>
+<p>[2] The case against specialized graph analytics engines, J. Fan, G. Soosai 
Raj, and J. M. Patel. CIDR 2015. <a 
href="http://cidrdb.org/cidr2015/Papers/CIDR15_Paper20.pdf";>http://cidrdb.org/cidr2015/Papers/CIDR15_Paper20.pdf</a>
 </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Tue May 16 2017 13:24:38 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>


http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/b5b51c69/docs/v1.11/group__grp__stats.html
----------------------------------------------------------------------
diff --git a/docs/v1.11/group__grp__stats.html 
b/docs/v1.11/group__grp__stats.html
new file mode 100644
index 0000000..3bc2ddf
--- /dev/null
+++ b/docs/v1.11/group__grp__stats.html
@@ -0,0 +1,137 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Statistics</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.incubator.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.incubator.apache.org";><img 
alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ 
></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.11</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__stats.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Statistics</div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed 
Description</h2>
+<p>Contains statistics modules </p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a 
name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__desc__stats"><td class="memItemLeft" 
align="right" valign="top">&#160;</td><td class="memItemRight" 
valign="bottom"><a class="el" href="group__grp__desc__stats.html">Descriptive 
Statistics</a></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__inf__stats"><td class="memItemLeft" 
align="right" valign="top">&#160;</td><td class="memItemRight" 
valign="bottom"><a class="el" href="group__grp__inf__stats.html">Inferential 
Statistics</a></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__prob"><td class="memItemLeft" align="right" 
valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" 
href="group__grp__prob.html">Probability Functions</a></td></tr>
+<tr class="memdesc:group__grp__prob"><td class="mdescLeft">&#160;</td><td 
class="mdescRight">Provides cumulative distribution, density/mass, and quantile 
functions for a wide range of probability distributions. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Tue May 16 2017 13:24:38 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/b5b51c69/docs/v1.11/group__grp__stats.js
----------------------------------------------------------------------
diff --git a/docs/v1.11/group__grp__stats.js b/docs/v1.11/group__grp__stats.js
new file mode 100644
index 0000000..0828141
--- /dev/null
+++ b/docs/v1.11/group__grp__stats.js
@@ -0,0 +1,6 @@
+var group__grp__stats =
+[
+    [ "Descriptive Statistics", "group__grp__desc__stats.html", 
"group__grp__desc__stats" ],
+    [ "Inferential Statistics", "group__grp__inf__stats.html", 
"group__grp__inf__stats" ],
+    [ "Probability Functions", "group__grp__prob.html", null ]
+];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/b5b51c69/docs/v1.11/group__grp__stats__tests.html
----------------------------------------------------------------------
diff --git a/docs/v1.11/group__grp__stats__tests.html 
b/docs/v1.11/group__grp__stats__tests.html
new file mode 100644
index 0000000..43857ee
--- /dev/null
+++ b/docs/v1.11/group__grp__stats__tests.html
@@ -0,0 +1,535 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Hypothesis Tests</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.incubator.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.incubator.apache.org";><img 
alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ 
></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.11</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__stats__tests.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Hypothesis Tests<div class="ingroups"><a class="el" 
href="group__grp__stats.html">Statistics</a> &raquo; <a class="el" 
href="group__grp__inf__stats.html">Inferential Statistics</a></div></div>  
</div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#input">Input</a> </li>
+<li>
+<a href="#usage">Usage</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#literature">Literature</a> </li>
+<li>
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>Hypothesis tests are used to confirm or reject a <em>null 
hypothesis</em> <img class="formulaInl" alt="$ H_0 $" src="form_397.png"/> 
about the distribution of random variables, given realizations of these random 
variables. Since in general it is not possible to make statements with 
certainty, one is interested in the probability <img class="formulaInl" alt="$ 
p $" src="form_111.png"/> of seeing random variates at least as extreme as the 
ones observed, assuming that <img class="formulaInl" alt="$ H_0 $" 
src="form_397.png"/> is true. If this probability <img class="formulaInl" 
alt="$ p $" src="form_111.png"/> is small, <img class="formulaInl" alt="$ H_0 
$" src="form_397.png"/> will be rejected by the test with <em>significance 
level</em> <img class="formulaInl" alt="$ p $" src="form_111.png"/>. Falsifying 
<img class="formulaInl" alt="$ H_0 $" src="form_397.png"/> is the canonic goal 
when employing a hypothesis test. That is, hypothesis tests are typically used 
in order to sub
 stantiate that instead the <em>alternative hypothesis</em> <img 
class="formulaInl" alt="$ H_1 $" src="form_398.png"/> is true.</p>
+<p>Hypothesis tests may be divided into parametric and non-parametric tests. A 
parametric test assumes certain distributions and makes inferences about 
parameters of the distributions (e.g., the mean of a normal distribution). 
Formally, there is a given domain of possible parameters <img 
class="formulaInl" alt="$ \Gamma $" src="form_399.png"/> and the null 
hypothesis <img class="formulaInl" alt="$ H_0 $" src="form_397.png"/> is the 
event that the true parameter <img class="formulaInl" alt="$ \gamma_0 \in 
\Gamma_0 $" src="form_400.png"/>, where <img class="formulaInl" alt="$ \Gamma_0 
\subsetneq \Gamma $" src="form_401.png"/>. Non-parametric tests, on the other 
hand, do not assume any particular distribution of the sample (e.g., a 
non-parametric test may simply test if two distributions are similar).</p>
+<p>The first step of a hypothesis test is to compute a <em>test 
statistic</em>, which is a function of the random variates, i.e., a random 
variate itself. A hypothesis test relies on the distribution of the test 
statistic being (approximately) known. Now, the <img class="formulaInl" alt="$ 
p $" src="form_111.png"/>-value is the probability of seeing a test statistic 
at least as extreme as the one observed, assuming that <img class="formulaInl" 
alt="$ H_0 $" src="form_397.png"/> is true. In a case where the null hypothesis 
corresponds to a family of distributions (e.g., in a parametric test where <img 
class="formulaInl" alt="$ \Gamma_0 $" src="form_402.png"/> is not a singleton 
set), the <img class="formulaInl" alt="$ p $" src="form_111.png"/>-value is the 
supremum, over all possible distributions according to the null hypothesis, of 
these probabilities.</p>
+<dl class="section note"><dt>Note</dt><dd>Please refer to <a class="el" 
href="hypothesis__tests_8sql__in.html">hypothesis_tests.sql_in</a> for 
additional technical information on the MADlib implementation of hypothesis 
tests, and for detailed function signatures for all tests.</dd></dl>
+<p><a class="anchor" id="input"></a></p><dl class="section 
user"><dt>Input</dt><dd></dd></dl>
+<p>Input data is assumed to be normalized with all values stored row-wise. In 
general, the following inputs are expected.</p>
+<p><b>One-sample tests</b> expect the following form: </p><pre>{TABLE|VIEW} 
<em>source</em> (
+    ...
+    <em>value</em> DOUBLE PRECISION
+    ...
+)</pre><p><b>Two-sample tests</b> expect the following form: 
</p><pre>{TABLE|VIEW} <em>source</em> (
+    ...
+    <em>first</em> BOOLEAN,
+    <em>value</em> DOUBLE PRECISION
+    ...
+)</pre><p> The <code>first</code> column indicates whether a value is from the 
first sample (if <code>TRUE</code>) or the second sample (if 
<code>FALSE</code>).</p>
+<p><b>Many-sample tests</b> expect the following form: </p><pre>{TABLE|VIEW} 
<em>source</em> (
+    ...
+    <em>group</em> INTEGER,
+    <em>value</em> DOUBLE PRECISION
+    ...
+)</pre><p><a class="anchor" id="usage"></a></p><dl class="section 
user"><dt>Usage</dt><dd></dd></dl>
+<p>All tests are implemented as aggregate functions. The non-parametric 
(rank-based) tests are implemented as ordered aggregate functions and thus 
necessitate an <code>ORDER BY</code> clause. In the following, the most simple 
forms of usage are given. Specific function signatures, as described in <a 
class="el" href="hypothesis__tests_8sql__in.html">hypothesis_tests.sql_in</a>, 
may require more arguments or a different <code>ORDER BY</code> clause.</p>
+<ul>
+<li>Run a parametric one-sample test: <pre>SELECT 
<em>test</em>(<em>value</em>) FROM <em>source</em></pre> where '<em>test</em>' 
can be one of<ul>
+<li><code>t_test_one</code> (one-sample or dependent paired Student's 
t-test)</li>
+<li><code>chi2_gof_test</code> (Pearson's chi-squared goodness of fit test, 
also used for chi-squared independence test as shown in example section 
below)</li>
+</ul>
+</li>
+<li>Run a parametric two-sample/multi-sample test: <pre>SELECT 
<em>test</em>(<em>first/group</em>, <em>value</em>) FROM <em>source</em></pre> 
where '<em>test</em>' can be one of<ul>
+<li><code>f_test</code> (Fisher F-test)</li>
+<li><code>t_test_two_pooled</code> (two-sample pooled Studentâs t-test, i.e. 
equal variances)</li>
+<li><code>t_test_two_unpooled</code> (two-sample unpooled t-test, i.e., 
unequal variances, also known as Welch's t-test)</li>
+<li><code>one_way_anova</code> (one-way analysis of variance, 
multi-sample)</li>
+</ul>
+</li>
+<li><p class="startli">Run a non-parametric two-sample/multi-sample test: 
</p><pre>SELECT <em>test</em>(<em>first/group</em>, <em>value</em> ORDER BY 
<em>value</em>) FROM <em>source</em></pre><p> where '<em>test</em>' can be one 
of</p><ul>
+<li><code>ks_test</code> (Kolmogorov-Smirnov test)</li>
+<li><code>mw_test</code> (Mann-Whitney test)</li>
+<li><code>wsr_test</code> (Wilcoxon signed-rank test, multi-sample)</li>
+</ul>
+<p class="startli"><b>Note on non-parametric tests:</b> Kolomogov-Smirnov 
two-sample test is based on the asymptotic theory. The p-value is given by 
comparing the test statistics with the Kolomogov distribution. The p-value is 
also adjusted for data with heavy tail distribution, which may give different 
results than those given by R function's ks.test. See [3] for a detailed 
explanation. The literature is not unanimous about the definitions of the 
Wilcoxon rank sum and Mann-Whitney tests. There are two possible definitions 
for the statistic; MADlib outputs the minimum of the two and uses it for 
significance testing. This might give different results for both mw_test and 
wsr_test compared to statistical functions in other popular packages (like R's 
wilcox.test function). See [4] for a detailed explanation.</p>
+</li>
+</ul>
+<p><a class="anchor" id="examples"></a></p><dl class="section 
user"><dt>Examples</dt><dd></dd></dl>
+<ul>
+<li><b>One-sample and two-sample t-test</b> (data is subset of mpg data from 
<a 
href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm";>NIST/SEMATECH</a>)</li>
+</ul>
+<pre class="example">
+-- Load data
+DROP TABLE IF EXISTS auto83b;
+CREATE TABLE auto83b (
+    id SERIAL,
+    mpg_us DOUBLE PRECISION,
+    mpg_j DOUBLE PRECISION
+);
+COPY auto83b (mpg_us, mpg_j) FROM stdin DELIMITER '|';
+18|24
+15|27
+18|27
+16|25
+17|31
+15|35
+14|24
+14|19
+21|31
+10|32
+10|24
+11|26
+9| 9
+\N|32
+\N|37
+\N|38
+\N|34
+\N|34
+\N|32
+\N|33
+\N|32
+\N|25
+\N|24
+\N|37
+13|\N
+12|\N
+18|\N
+21|\N
+19|\N
+21|\N
+15|\N
+16|\N
+15|\N
+11|\N
+20|\N
+21|\N
+19|\N
+15|\N
+\.
+</pre><pre class="example">
+-- Create table for one sample tests
+DROP TABLE IF EXISTS auto83b_one_sample;
+CREATE TABLE auto83b_one_sample AS
+    SELECT mpg_us AS mpg
+    FROM auto83b
+    WHERE mpg_us is not NULL;
+-- Print table
+SELECT * FROM auto83b_one_sample;
+</pre><pre class="result">
+mpg 
+  18
+  15
+  18
+  16
+  17
+  15
+  14
+  14
+  21
+  10
+  10
+  11
+   9
+  13
+  12
+  18
+  21
+  19
+  21
+  15
+  16
+  15
+  11
+  20
+  21
+  19
+  15
+(27 rows)
+</pre> <pre class="example">
+-- Create table for two sample tests
+DROP TABLE IF EXISTS auto83b_two_sample;
+CREATE TABLE auto83b_two_sample AS
+SELECT TRUE AS is_us, mpg_us AS mpg
+    FROM auto83b
+    WHERE mpg_us is not NULL
+    UNION ALL
+    SELECT FALSE, mpg_j
+    FROM auto83b
+    WHERE mpg_j is not NULL;
+-- Print table
+SELECT * FROM auto83b_two_sample;
+</pre> <pre class="result">
+ is_us | mpg 
+-------+-----
+ t     |  18
+ t     |  15
+ t     |  18
+ t     |  16
+ t     |  17
+ t     |  15
+ t     |  14
+ t     |  14
+ t     |  21
+ t     |  10
+ t     |  10
+ t     |  11
+ t     |   9
+ t     |  13
+ t     |  12
+ t     |  18
+ t     |  21
+ t     |  19
+ t     |  21
+ t     |  15
+ t     |  16
+ t     |  15
+ t     |  11
+ t     |  20
+ t     |  21
+ t     |  19
+ t     |  15
+ f     |  24
+ f     |  27
+ f     |  27
+ f     |  25
+ f     |  31
+ f     |  35
+ f     |  24
+ f     |  19
+ f     |  31
+ f     |  32
+ f     |  24
+ f     |  26
+ f     |   9
+ f     |  32
+ f     |  37
+ f     |  38
+ f     |  34
+ f     |  34
+ f     |  32
+ f     |  33
+ f     |  32
+ f     |  25
+ f     |  24
+ f     |  37
+(51 rows)
+</pre> <pre class="example">
+-- One sample tests
+SELECT (madlib.t_test_one(mpg - 20)).* FROM auto83b_one_sample;  -- test 
rejected for mean = 20
+</pre><pre class="result">
+     statistic     | df | p_value_one_sided |  p_value_two_sided
+ ------------------+----+-------------------+----------------------
+  -6.0532478722666 | 26 | 0.999998926789141 | 2.14642171769697e-06
+ </pre><pre class="example">
+SELECT (madlib.t_test_one(mpg - 15.7)).* FROM auto83b_one_sample;  -- test not 
rejected
+</pre><pre class="result">
+       statistic      | df | p_value_one_sided | p_value_two_sided
+ ---------------------+----+-------------------+-------------------
+  0.00521831713126531 | 26 | 0.497938118950661 | 0.995876237901321
+</pre><pre class="example">
+-- Two sample tests
+SELECT (madlib.t_test_two_pooled(is_us, mpg)).* FROM auto83b_two_sample;
+</pre> <pre class="result">
+     statistic     | df | p_value_one_sided |  p_value_two_sided
+ -------------------+----+-------------------+----------------------
+  -8.89342267075968 | 49 | 0.999999999995748 | 8.50408632402377e-12
+ </pre><pre class="example">
+SELECT (madlib.t_test_two_unpooled(is_us, mpg)).* FROM auto83b_two_sample;
+</pre><pre class="result">
+      statistic     |        df        | p_value_one_sided |  p_value_two_sided
+ 
-------------------+------------------+-------------------+----------------------
+  -8.61746388524314 | 35.1283818346179 | 0.999999999821218 | 
3.57563867403599e-10
+</pre><ul>
+<li><b>F-Test</b> (Uses same data as above t-test)</li>
+</ul>
+<pre class="example">
+SELECT (madlib.f_test(is_us, mpg)).* FROM auto83b_two_sample;
+-- Test result indicates that the two distributions have different variances
+</pre> <pre class="result">
+      statistic     | df1 | df2 | p_value_one_sided |  p_value_two_sided
+ -------------------+-----+-----+-------------------+---------------------
+  0.311786921089247 |  26 |  23 | 0.997559863672441 | 0.00488027265511803
+</pre><ul>
+<li><b>Chi-squared goodness-of-fit test</b> (<a 
href="http://www.statsdirect.com/help/default.htm#nonparametric_methods/chisq_goodness_fit.htm";>Data
 source</a>)</li>
+</ul>
+<pre class="example">
+CREATE TABLE chi2_test_blood_group (
+    id SERIAL,
+    blood_group VARCHAR,
+    observed BIGINT,
+    expected DOUBLE PRECISION
+);
+INSERT INTO chi2_test_blood_group(blood_group, observed, expected) VALUES
+    ('O', 67, 82.28),
+    ('A', 83, 84.15),
+    ('B', 29, 14.96),
+    ('AB', 8, 5.61);
+SELECT (madlib.chi2_gof_test(observed, expected)).* FROM chi2_test_blood_group;
+</pre> <pre class="result">
+     statistic     |       p_value        | df |       phi        | 
contingency_coef
+ 
------------------+----------------------+----+------------------+-------------------
+  17.0481013341976 | 0.000690824622923826 |  3 | 2.06446732440826 | 
0.899977280680593
+ </pre><ul>
+<li><b>Chi-squared independence test</b> (<a 
href="http://itl.nist.gov/div898/software/dataplot/refman1/auxillar/chistest.htm";>Data
 source</a>)</li>
+</ul>
+<p>The Chi-squared independence test uses the Chi-squared goodness-of-fit 
function, as shown in the example below. The expected value needs to be 
computed and passed to the goodness-of-fit function. The expected value for 
MADlib is computed as <em>sum of rows * sum of columns</em>, for each element 
of the input matrix. For e.g., expected value for element (2,1) would be 
<em>sum of row 2 * sum of column 1</em>.</p>
+<pre class="example">
+CREATE TABLE chi2_test_friendly (
+    id_x SERIAL,
+    values INTEGER[]
+);
+INSERT INTO chi2_test_friendly(values) VALUES
+    (array[5, 29, 14, 16]),
+    (array[15, 54, 14, 10]),
+    (array[20, 84, 17, 94]),
+    (array[68, 119, 26, 7]);</pre><pre class="example">-- Input table is 
expected to be unpivoted, so need to pivot it
+CREATE TABLE chi2_test_friendly_unpivoted AS
+SELECT id_x, id_y, values[id_y] AS observed
+FROM
+    chi2_test_friendly,
+    generate_series(1,4) AS id_y;</pre><pre class="example">-- Compute 
Chi-squared independence statistic, by calculating expected value in the SQL 
and calling the goodness-of-fit function
+SELECT (madlib.chi2_gof_test(observed, expected, deg_freedom)).*
+FROM (
+    -- Compute expected values and degrees of freedom
+    SELECT
+        observed,
+        sum(observed) OVER (PARTITION BY id_x)::DOUBLE PRECISION *
+        sum(observed) OVER (PARTITION BY id_y) AS expected
+    FROM chi2_test_friendly_unpivoted
+) p, (
+    SELECT
+        (count(DISTINCT id_x) - 1) * (count(DISTINCT id_y) - 1) AS deg_freedom
+    FROM chi2_test_friendly_unpivoted
+) q;
+</pre> <pre class="result">
+     statistic     |       p_value        | df |       phi        | 
contingency_coef
+ 
------------------+----------------------+----+------------------+-------------------
+  138.289841626008 | 2.32528678709871e-25 |  9 | 2.93991753313346 | 
0.946730727519112
+ </pre><ul>
+<li><b>ANOVA test</b> (<a 
href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc433.htm";>Data 
source</a>)</li>
+</ul>
+<pre class="example">
+CREATE TABLE nist_anova_test (
+    id SERIAL,
+    resistance FLOAT8[]
+);
+INSERT INTO nist_anova_test(resistance) VALUES
+    (array[6.9,8.3,8.0]),
+    (array[5.4,6.8,10.5]),
+    (array[5.8,7.8,8.1]),
+    (array[4.6,9.2,6.9]),
+    (array[4.0,6.5,9.3]);</pre><pre class="example">SELECT 
(madlib.one_way_anova(level, value)).* FROM (
+    SELECT level, resistance[level] AS value
+    FROM
+        nist_anova_test, (SELECT * FROM generate_series(1,3) level) q1
+) q2;
+</pre> <pre class="result">
+  sum_squares_between | sum_squares_within | df_between | df_within | 
mean_squares_between | mean_squares_within |    statistic     |      p_value
+ 
---------------------+--------------------+------------+-----------+----------------------+---------------------+------------------+--------------------
+     27.8973333333333 |             17.452 |          2 |        12 |     
13.9486666666667 |    1.45433333333333 | 9.59110703644281 | 0.0032482226008593
+</pre><ul>
+<li><b>Kolmogorov-Smirnov test</b> (<a 
href="http://www.physics.csbsju.edu/stats/KS-test.html";>Data source</a>)</li>
+</ul>
+<pre class="example">
+CREATE TABLE ks_sample_1 AS
+SELECT
+    TRUE AS first,
+    unnest(ARRAY[0.22, -0.87, -2.39, -1.79, 0.37, -1.54, 1.28, -0.31, -0.74, 
1.72, 0.38, -0.17, -0.62, -1.10, 0.30, 0.15, 2.30, 0.19, -0.50, -0.09]) AS value
+UNION ALL
+SELECT
+    FALSE,
+    unnest(ARRAY[-5.13, -2.19, -2.43, -3.83, 0.50, -3.25, 4.32, 1.63, 5.18, 
-0.43, 7.11, 4.87, -3.10, -5.81, 3.76, 6.31, 2.58, 0.07, 5.76, 
3.50]);</pre><pre class="example">SELECT (madlib.ks_test(first, value,
+    (SELECT count(value) FROM ks_sample_1 WHERE first),
+    (SELECT count(value) FROM ks_sample_1 WHERE NOT first)
+    ORDER BY value)).*
+FROM ks_sample_1;
+</pre> <pre class="result">
+  statistic |   k_statistic   |      p_value
+ -----------+-----------------+--------------------
+       0.45 | 1.4926782214936 | 0.0232132758544496
+</pre><ul>
+<li><b>Mann-Whitney test</b> (use same data as t-test)</li>
+</ul>
+<pre class="example">
+SELECT (madlib.mw_test(is_us, mpg ORDER BY mpg)).* from auto83b_two_sample;
+-- Note first parameter above is BOOLEAN
+</pre> <pre class="result">
+      statistic     | u_statistic | p_value_one_sided |  p_value_two_sided
+ -------------------+-------------+-------------------+----------------------
+  -5.50097925755249 |        32.5 | 0.999999981115618 | 3.77687645883758e-08
+</pre><ul>
+<li><b>Wilcoxon signed-rank test</b></li>
+</ul>
+<pre class="example">
+DROP TABLE IF EXISTS test_wsr;
+CREATE TABLE test_wsr (
+    x DOUBLE PRECISION,
+    y DOUBLE PRECISION
+);
+COPY test_wsr (x, y) FROM stdin DELIMITER '|';
+0.32|0.39
+0.4|0.47
+0.11|0.11
+0.47|0.43
+0.32|0.42
+0.35|0.3
+0.32|0.43
+0.63|0.98
+0.5|0.86
+0.6|0.79
+0.38|0.33
+0.46|0.45
+0.2|0.22
+0.31|0.3
+0.62|0.6
+0.52|0.53
+0.77|0.85
+0.23|0.21
+0.3|0.33
+0.7|0.57
+0.41|0.43
+0.53|0.49
+0.19|0.2
+0.31|0.35
+0.48|0.4
+\.
+
+SELECT (madlib.wsr_test(
+    x - y,
+    2 * 2^(-52) * greatest(x,y)
+    ORDER BY abs(x - y)
+)).*
+FROM test_wsr;
+</pre> <pre class="result">
+  statistic | rank_sum_pos | rank_sum_neg | num |    z_statistic    | 
p_value_one_sided | p_value_two_sided
+ 
-----------+--------------+--------------+-----+-------------------+-------------------+-------------------
+      105.5 |        105.5 |        194.5 |  24 | -1.27318365656729 | 
0.898523560667509 | 0.202952878664983
+</pre><p><a class="anchor" id="literature"></a></p><dl class="section 
user"><dt>Literature</dt><dd></dd></dl>
+<p>[1] M. Hollander, D. Wolfe: <em>Nonparametric Statistical Methods</em>, 2nd 
edition, Wiley, 1999</p>
+<p>[2] E. Lehmann, J. Romano: <em>Testing Statistical Hypotheses</em>, 3rd 
edition, Springer, 2005</p>
+<p>[3] M. Stephens: <em>Use of the Kolmogorov-Smirnov, Cramer-Von Mises and 
related statistics without extensive tables</em>, Journal of the Royal 
Statistical Society. Series B (Methodological) (1970): 115-122.</p>
+<p>[4] Wikipedia: MannâWhitney U test calculation, <a 
href="http://en.wikipedia.org/wiki/Mann-Whitney_test#Calculations";>http://en.wikipedia.org/wiki/Mann-Whitney_test#Calculations</a></p>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related 
Topics</dt><dd></dd></dl>
+<p>File <a class="el" href="hypothesis__tests_8sql__in.html" title="SQL 
functions for statistical hypothesis tests. ">hypothesis_tests.sql_in</a> 
documenting the SQL functions. </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Tue May 16 2017 13:24:38 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/b5b51c69/docs/v1.11/group__grp__stemmer.html
----------------------------------------------------------------------
diff --git a/docs/v1.11/group__grp__stemmer.html 
b/docs/v1.11/group__grp__stemmer.html
new file mode 100644
index 0000000..ba04b63
--- /dev/null
+++ b/docs/v1.11/group__grp__stemmer.html
@@ -0,0 +1,234 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Stemming</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.incubator.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.incubator.apache.org";><img 
alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ 
></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.11</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__stemmer.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Stemming<div class="ingroups"><a class="el" 
href="group__grp__datatrans.html">Data Types and 
Transformations</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#notes">Implementation Notes</a> </li>
+<li>
+<a href="#list">List of Stemmer Operations</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>This module provides a basic stemming operation for text input. It is 
a support module for several machine learning algorithms that require a 
stemmer. Currently, it only supports English words.</p>
+<p>This function is a SQL interface to the implementation of the <a 
href="http://tartarus.org/~martin/PorterStemmer/";>Porter Stemming 
Algorithm</a>. The original stemming algorithm is written and maintained by 
Martin Porter</p>
+<p><a class="anchor" id="notes"></a></p><dl class="section 
user"><dt>Implementation Notes</dt><dd></dd></dl>
+<p>All functions described in this module work with text OR text array.</p>
+<p>Several of the function require TEXT VALUES, and returns NULL for a NULL 
input. See details in description of individual functions.</p>
+<p><a class="anchor" id="list"></a></p><dl class="section user"><dt>Stemmer 
Operations</dt><dd><table class="output">
+<tr>
+<th><a class="el" 
href="porter__stemmer_8sql__in.html#aca5bc24a9a8f5c33470b9f0bf0b3c515" 
title="Returns stem of input token. Returns NULL if input token is NULL. 
">stem_token()</a></th><td><p class="starttd">Returns the stem of the token. 
Returns NULL if input is NULL.</p>
+<p class="endtd"></p>
+</td></tr>
+<tr>
+<th><a class="el" 
href="porter__stemmer_8sql__in.html#a1ac3a2fd645ddf807b36a1328134a4ea" 
title="Returns stems in an array of input token array. Returns NULL element for 
corresponding input NULL tok...">stem_token_arr()</a></th><td><p 
class="starttd">Returns the stems in an array of input token array. The stem 
would be NULL for corresponding NULL token.</p>
+<p class="endtd"><a class="anchor" id="examples"></a></p>
+</td></tr>
+</table>
+</dd></dl>
+<dl class="section user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>Create a table with some words to be stemmed. <pre class="example">
+CREATE TABLE token_tbl ( id integer,
+                         word text
+                       );
+INSERT INTO token_tbl VALUES
+ (1, 'kneel'),
+ (2, 'kneeled'),
+ (3, 'kneeling'),
+ (4, 'kneels'),
+ (5, 'knees'),
+ (6, 'knell'),
+ (7, 'knelt'),
+ (8, 'knew'),
+ (9, 'knick'),
+ (10, 'knif'),
+ (11, 'knife'),
+ (12, 'knight'),
+ (13, 'knightly'),
+ (14, 'knights'),
+ (15, 'knit'),
+ (16, 'knits'),
+ (17, 'knitted'),
+ (18, 'knitting'),
+ (19, 'knives'),
+ (20, 'knob'),
+ (21, 'knobs'),
+ (22, 'knock'),
+ (23, 'knocked'),
+ (24, 'knocker'),
+ (25, 'knockers'),
+ (26, 'knocking'),
+ (27, 'knocks'),
+ (28, 'knopp'),
+ (29, 'knot'),
+ (30, 'knots');
+</pre></li>
+<li>Return the stem words <pre class="example">
+SELECT madlib.stem_token(word) FROM token_tbl;
+</pre> <pre class="result">
+ stem_token
+&#160;------------
+ kneel
+ kneel
+ kneel
+ kneel
+ knee
+ knell
+ knelt
+ knew
+ knick
+ knif
+ knife
+ knight
+ knight
+ knight
+ knit
+ knit
+ knit
+ knit
+ knive
+ knob
+ knob
+ knock
+ knock
+ knocker
+ knocker
+ knock
+ knock
+ knopp
+ knot
+ knot
+(30 rows)
+</pre></li>
+<li>The input can be processed as an array <pre class="example">
+SELECT madlib.stem_token_arr(array_agg(word order by word)) FROM token_tbl;
+</pre> <pre class="result">
+  stem_token_arr
+&#160;-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 
{kneel,kneel,kneel,kneel,knee,knell,knelt,knew,knick,knif,knife,knight,knight,knight,knit,knit,knit,knit,knive,knob,knob,knock,knock,knocker,knocker,knock,knock,knopp,knot,knot}
+(1 row)
+</pre></li>
+</ol>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related 
Topics</dt><dd></dd></dl>
+<p>File <a class="el" href="porter__stemmer_8sql__in.html" 
title="implementation of porter stemmer operations in SQL 
">porter_stemmer.sql_in</a> for list of functions and usage. </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Tue May 16 2017 13:24:38 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/b5b51c69/docs/v1.11/group__grp__summary.html
----------------------------------------------------------------------
diff --git a/docs/v1.11/group__grp__summary.html 
b/docs/v1.11/group__grp__summary.html
new file mode 100644
index 0000000..efa19ba
--- /dev/null
+++ b/docs/v1.11/group__grp__summary.html
@@ -0,0 +1,308 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Summary</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.incubator.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.incubator.apache.org";><img 
alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ 
></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.11</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__summary.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Summary<div class="ingroups"><a class="el" 
href="group__grp__stats.html">Statistics</a> &raquo; <a class="el" 
href="group__grp__desc__stats.html">Descriptive Statistics</a></div></div>  
</div>
+</div><!--header-->
+<div class="contents">
+<div class="toc"><b>Contents</b> <ul>
+<li>
+<a href="#usage">Summary Function Syntax</a> </li>
+<li>
+<a href="#examples">Examples</a> </li>
+<li>
+<a href="#notes">Notes</a> </li>
+<li>
+<a href="#related">Related Topics</a> </li>
+</ul>
+</div><p>The MADlib <b><a class="el" 
href="summary_8sql__in.html#a14ff66aaa0248c5e2741dfaf2e360881">summary()</a></b>
 function produces summary statistics for any data table. The function invokes 
various methods from the MADlib library to provide the data overview.</p>
+<p><a class="anchor" id="usage"></a></p><dl class="section user"><dt>Summary 
Function Syntax</dt><dd>The <b><a class="el" 
href="summary_8sql__in.html#a14ff66aaa0248c5e2741dfaf2e360881">summary()</a></b>
 function has the following syntax:</dd></dl>
+<pre class="syntax">
+summary ( source_table,
+          output_table,
+          target_cols,
+          grouping_cols,
+          get_distinct,
+          get_quartiles,
+          ntile_array,
+          how_many_mfv,
+          get_estimates
+        )
+</pre><p> The <b><a class="el" 
href="summary_8sql__in.html#a14ff66aaa0248c5e2741dfaf2e360881">summary()</a></b>
 function returns a composite type containing three fields: </p><table 
class="output">
+<tr>
+<th>output_table </th><td>TEXT. The name of the output table.  </td></tr>
+<tr>
+<th>row_count </th><td>INTEGER. The number of rows in the output table.  
</td></tr>
+<tr>
+<th>duration </th><td>FLOAT8. The time taken (in seconds) to compute the 
summary.  </td></tr>
+</table>
+<p><b>Arguments</b> </p><dl class="arglist">
+<dt>source_table </dt>
+<dd>TEXT. The name of the table containing the input data. </dd>
+<dt>output_table </dt>
+<dd><p class="startdd">TEXT. The name of the table to contain the output 
summary data.</p>
+<p class="enddd">Summary statistics are saved in a table with the name 
specifed in the <em>output_table</em> argument. The table contains the 
following columns: </p><table class="output">
+<tr>
+<th>group_by </th><td>Group-by column name. NULL if none provided.  </td></tr>
+<tr>
+<th>group_by_value </th><td>Value of the Group-by column. NULL if there is no 
grouping.  </td></tr>
+<tr>
+<th>target_column </th><td>Targeted column values for which summary is 
requested.  </td></tr>
+<tr>
+<th>column_number </th><td>Physical column number for the target column, as 
described in <em>pg_attribute</em>  catalog.  </td></tr>
+<tr>
+<th>data_type </th><td>Data type of the target column. Standard GPDB type 
descriptors are displayed.  </td></tr>
+<tr>
+<th>row_count </th><td>Number of rows for the target column.  </td></tr>
+<tr>
+<th>distinct_values </th><td>Number of distinct values in the target column. 
When the <a class="el" 
href="summary_8sql__in.html#a14ff66aaa0248c5e2741dfaf2e360881">summary()</a> 
function is called with the <em>get_estimates</em> argument set to TRUE, this 
is an estimated statistic based on the Flajolet-Martin distinct count 
estimator.  </td></tr>
+<tr>
+<th>missing_values </th><td>Number of missing values in the target column.  
</td></tr>
+<tr>
+<th>blank_values </th><td>Number of blank values. Blanks are defined by this 
regular expression:<pre class="fragment">'^\w*$'</pre>  </td></tr>
+<tr>
+<th>fraction_missing </th><td>Percentage of total rows that are missing, as a 
decimal value, e.g. 0.3.  </td></tr>
+<tr>
+<th>fraction_blank </th><td>Percentage of total rows that are blank, as a 
decimal value, e.g. 0.3.  </td></tr>
+<tr>
+<th>mean </th><td>Mean value of target column if target is numeric, otherwise 
NULL.  </td></tr>
+<tr>
+<th>variance </th><td>Variance of target column if target is numeric, 
otherwise NULL.  </td></tr>
+<tr>
+<th>min </th><td>Minimum value of target column. For strings this is the 
length of the shortest string.  </td></tr>
+<tr>
+<th>max </th><td>Maximum value of target column. For strings this is the 
length of the longest string.  </td></tr>
+<tr>
+<th>first_quartile </th><td>First quartile (25th percentile), only for numeric 
columns. <b>Currently unavailable for PostgreSQL 9.3 or lower</b>.  </td></tr>
+<tr>
+<th>median </th><td>Median value of target column, if target is numeric, 
otherwise NULL. <b>Currently unavailable for PostgreSQL 9.3 or lower</b>.  
</td></tr>
+<tr>
+<th>third_quartile </th><td>Third quartile (25th percentile), only for numeric 
columns. <b>Currently unavailable for PostgreSQL 9.3 or lower</b>.  </td></tr>
+<tr>
+<th>quantile_array </th><td>Percentile values corresponding to 
<em>ntile_array</em>. <b>Currently unavailable for PostgreSQL 9.3 or lower</b>. 
 </td></tr>
+<tr>
+<th>most_frequent_values </th><td>An array containing the most frequently 
occurring values. The <em>how_many_mfv</em> argument determines the length of 
the array, 10 by default. If the <a class="el" 
href="summary_8sql__in.html#a14ff66aaa0248c5e2741dfaf2e360881">summary()</a> 
function is called with the <em>get_estimates</em> argument set to TRUE 
(default), the frequent values computation is performed using a parallel 
aggregation method that is faster, but in some cases can fail to detect the 
exact most frequent values.  </td></tr>
+<tr>
+<th>mfv_frequencies </th><td>Array containing the frequency count for each of 
the most frequent values.   </td></tr>
+</table>
+</dd>
+<dt>target_columns (optional) </dt>
+<dd>TEXT, default NULL. A comma-separated list of columns to summarize. If 
NULL, summaries are produced for all columns. </dd>
+<dt>grouping_cols (optional) </dt>
+<dd>TEXT, default: null. A comma-separated list of columns on which to group 
results. If NULL, summaries are produced on the complete table. <dl 
class="section note"><dt>Note</dt><dd>Please note that summary statistics are 
calculated for each grouping column independently. That is, grouping columns 
are not combined together as in the regular PostgreSQL style GROUP BY 
directive. (This was done to reduce long run time and huge output table size 
which would otherwise result in the case of large input tables with a lot of 
grouping_cols and target_cols specified.) </dd></dl>
+</dd>
+<dt>get_distinct (optional) </dt>
+<dd>BOOLEAN, default TRUE. If true, distinct values are counted. </dd>
+<dt>get_quartiles (optional) </dt>
+<dd>BOOLEAN, default TRUE. If TRUE, quartiles are computed. </dd>
+<dt>ntile_array (optional) </dt>
+<dd>FLOAT8[], default NULL. An array of quantile values to compute. If NULL, 
quantile values are not computed. <dl class="section 
note"><dt>Note</dt><dd>Quartile and quantile functions are not available for 
PostgreSQL 9.3 or lower. If you are using PostgreSQL 9.3 or lower, the output 
table will not contain these values, even if you set 'get_quartiles' = TRUE or 
provide an array of quantile values for the parameter 'ntile_array'. </dd></dl>
+</dd>
+<dt>how_many_mfv (optional) </dt>
+<dd>INTEGER, default: 10. The number of most-frequent-values to compute. </dd>
+<dt>get_estimates (optional) </dt>
+<dd>BOOLEAN, default TRUE. If TRUE, estimated values are produced for distinct 
values and most frequent values. If FALSE, exact values are calculated (may 
take longer to run depending on data size). </dd>
+</dl>
+<p><a class="anchor" id="examples"></a></p><dl class="section 
user"><dt>Examples</dt><dd></dd></dl>
+<ol type="1">
+<li>View online help for the <a class="el" 
href="summary_8sql__in.html#a14ff66aaa0248c5e2741dfaf2e360881">summary()</a> 
function. <pre class="example">
+SELECT * FROM madlib.summary();
+</pre></li>
+<li>Create an input data set. <pre class="example">
+CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT,
+             size INT, lot INT);
+COPY houses FROM STDIN WITH DELIMITER '|';
+  1 |  590 |       2 |    1 |  50000 |  770 | 22100
+  2 | 1050 |       3 |    2 |  85000 | 1410 | 12000
+  3 |   20 |       3 |    1 |  22500 | 1060 |  3500
+  4 |  870 |       2 |    2 |  90000 | 1300 | 17500
+  5 | 1320 |       3 |    2 | 133000 | 1500 | 30000
+  6 | 1350 |       2 |    1 |  90500 |  820 | 25700
+  7 | 2790 |       3 |  2.5 | 260000 | 2130 | 25000
+  8 |  680 |       2 |    1 | 142500 | 1170 | 22000
+  9 | 1840 |       3 |    2 | 160000 | 1500 | 19000
+ 10 | 3680 |       4 |    2 | 240000 | 2790 | 20000
+ 11 | 1660 |       3 |    1 |  87000 | 1030 | 17500
+ 12 | 1620 |       3 |    2 | 118600 | 1250 | 20000
+ 13 | 3100 |       3 |    2 | 140000 | 1760 | 38000
+ 14 | 2070 |       2 |    3 | 148000 | 1550 | 14000
+ 15 |  650 |       3 |  1.5 |  65000 | 1450 | 12000
+\.
+</pre></li>
+<li>Run the <b><a class="el" 
href="summary_8sql__in.html#a14ff66aaa0248c5e2741dfaf2e360881">summary()</a></b>
 function. <pre class="example">
+SELECT * FROM madlib.summary( 'houses',
+                              'houses_summary',
+                              'tax,bedroom,lot,bath,price,size,lot',
+                              'bedroom',
+                              TRUE,
+                              TRUE,
+                              NULL,
+                              5,
+                              FALSE
+                            );
+</pre> Result: <pre class="result">
+  output_table  | row_count |    duration
+----------------+-----------+----------------
+ houses_summary |        21 | 0.207587003708
+(1 row)
+</pre></li>
+<li>View the summary data. <pre class="example">
+-- Turn on expanded display for readability.
+\x on
+SELECT * FROM houses_summary;
+</pre> Result: <pre class="result">
+&#160;-[ RECORD 1 ]--------+-----------------------------------
+ group_by             | bedroom
+ group_by_value       | 3
+ target_column        | tax
+ column_number        | 2
+ data_type            | int4
+ row_count            | 9
+ distinct_values      | 9
+ missing_values       | 0
+ blank_values         |
+ fraction_missing     | 0
+ fraction_blank       |
+ mean                 | 1561.11111111111
+ variance             | 936736.111111111
+ min                  | 20
+ max                  | 3100
+ most_frequent_values | {20,1320,2790,1840,1660}
+ mfv_frequencies      | {1,1,1,1,1}
+&#160;-[ RECORD 2 ]--------+-----------------------------------
+ group_by             | bedroom
+ group_by_value       | 3
+ target_column        | bath
+ column_number        | 4
+ ...
+</pre></li>
+</ol>
+<p><a class="anchor" id="notes"></a></p><dl class="section 
user"><dt>Notes</dt><dd><ul>
+<li>Table names can be optionally schema qualified (current_schemas() would be 
searched if a schema name is not provided) and table and column names should 
follow case-sensitivity and quoting rules per the database. (For instance, 
'mytable' and 'MyTable' both resolve to the same entity, i.e. 'mytable'. If 
mixed-case or multi-byte characters are desired for entity names then the 
string should be double-quoted; in this case the input would be 
'"MyTable"').</li>
+<li>Estimated values are only implemented for the distinct values 
computation.</li>
+<li>The <em>get_estimates</em> parameter controls computation for two 
statistics:<ul>
+<li>If <em>get_estimates</em> is TRUE then the distinct value computation is 
estimated. Further, the most frequent values computation is computed using a 
"quick and dirty" method that does parallel aggregation in Greenplum Database 
at the expense of missing some of the most frequent values.</li>
+<li>If <em>get_estimates</em> is FALSE then the distinct values are computed 
in a slow but exact method. The most frequent values are computed using a 
faithful implementation that preserves the approximation guarantees of the 
Cormode/Muthukrishnan method (more information in <a class="el" 
href="group__grp__mfvsketch.html">MFV (Most Frequent Values)</a>).</li>
+</ul>
+</li>
+<li>Summary statistics are calculated for each grouping column independently. 
That is, grouping columns are not combined together as in the regular 
PostgreSQL style GROUP BY directive. (This was done to reduce long run time and 
huge output table size which would otherwise result in the case of large input 
tables with a lot of grouping_cols and target_cols specified.)</li>
+<li>Quartile and quantile functions are not available for PostgreSQL 9.3 or 
lower. If you are using PostgreSQL 9.3 or lower, the output table will not 
contain these values, even if you set 'get_quartiles' = TRUE or provide an 
array of quantile values for the parameter 'ntile_array'.</li>
+</ul>
+</dd></dl>
+<p><a class="anchor" id="related"></a></p><dl class="section user"><dt>Related 
Topics</dt><dd>File <a class="el" href="summary_8sql__in.html" title="Summary 
function for descriptive statistics. ">summary.sql_in</a> documenting the <b><a 
class="el" 
href="summary_8sql__in.html#a14ff66aaa0248c5e2741dfaf2e360881">summary()</a></b>
 function</dd></dl>
+<p><a class="el" href="group__grp__mfvsketch.html">MFV (Most Frequent 
Values)</a> </p>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Tue May 16 2017 13:24:38 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/b5b51c69/docs/v1.11/group__grp__super.html
----------------------------------------------------------------------
diff --git a/docs/v1.11/group__grp__super.html 
b/docs/v1.11/group__grp__super.html
new file mode 100644
index 0000000..b028f3b
--- /dev/null
+++ b/docs/v1.11/group__grp__super.html
@@ -0,0 +1,140 @@
+<!-- HTML header for doxygen 1.8.4-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml";>
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="keywords" content="madlib,postgres,greenplum,machine learning,data 
mining,deep learning,ensemble methods,data science,market basket 
analysis,affinity analysis,pca,lda,regression,elastic net,huber 
white,proportional hazards,k-means,latent dirichlet allocation,bayes,support 
vector machines,svm"/>
+<title>MADlib: Supervised Learning</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="navtree.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="resize.js"></script>
+<script type="text/javascript" src="navtreedata.js"></script>
+<script type="text/javascript" src="navtree.js"></script>
+<script type="text/javascript">
+  $(document).ready(initResizable);
+</script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { init_search(); });
+</script>
+<!-- hack in the navigation tree -->
+<script type="text/javascript" src="eigen_navtree_hacks.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+<link href="madlib_extra.css" rel="stylesheet" type="text/css"/>
+<!-- google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-45382226-1', 'madlib.incubator.apache.org');
+  ga('send', 'pageview');
+</script>
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectlogo"><a href="http://madlib.incubator.apache.org";><img 
alt="Logo" src="madlib.png" height="50" style="padding-left:0.5em;" border="0"/ 
></a></td>
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <span id="projectnumber">1.11</span>
+   </div>
+   <div id="projectbrief">User Documentation for MADlib</div>
+  </td>
+   <td>        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" 
href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" 
border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+</div><!-- top -->
+<div id="side-nav" class="ui-resizable side-nav-resizable">
+  <div id="nav-tree">
+    <div id="nav-tree-contents">
+      <div id="nav-sync" class="sync"></div>
+    </div>
+  </div>
+  <div id="splitbar" style="-moz-user-select:none;" 
+       class="ui-resizable-handle">
+  </div>
+</div>
+<script type="text/javascript">
+$(document).ready(function(){initNavTree('group__grp__super.html','');});
+</script>
+<div id="doc-content">
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#groups">Modules</a>  </div>
+  <div class="headertitle">
+<div class="title">Supervised Learning</div>  </div>
+</div><!--header-->
+<div class="contents">
+<a name="details" id="details"></a><h2 class="groupheader">Detailed 
Description</h2>
+<p>Contains methods which perform supervised learning tasks </p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a 
name="groups"></a>
+Modules</h2></td></tr>
+<tr class="memitem:group__grp__crf"><td class="memItemLeft" align="right" 
valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" 
href="group__grp__crf.html">Conditional Random Field</a></td></tr>
+<tr class="memdesc:group__grp__crf"><td class="mdescLeft">&#160;</td><td 
class="mdescRight">Constructs a Conditional Random Fields (CRF) model for 
labeling sequential data. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__regml"><td class="memItemLeft" align="right" 
valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" 
href="group__grp__regml.html">Regression Models</a></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__svm"><td class="memItemLeft" align="right" 
valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" 
href="group__grp__svm.html">Support Vector Machines</a></td></tr>
+<tr class="memdesc:group__grp__svm"><td class="mdescLeft">&#160;</td><td 
class="mdescRight">Solves classification and regression problems by separating 
data with a hyperplane or other nonlinear decision boundary. <br /></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:group__grp__tree"><td class="memItemLeft" align="right" 
valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" 
href="group__grp__tree.html">Tree Methods</a></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+</div><!-- contents -->
+</div><!-- doc-content -->
+<!-- start footer part -->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    <li class="footer">Generated on Tue May 16 2017 13:24:38 for MADlib by
+    <a href="http://www.doxygen.org/index.html";>
+    <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.13 </li>
+  </ul>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/b5b51c69/docs/v1.11/group__grp__super.js
----------------------------------------------------------------------
diff --git a/docs/v1.11/group__grp__super.js b/docs/v1.11/group__grp__super.js
new file mode 100644
index 0000000..a83833b
--- /dev/null
+++ b/docs/v1.11/group__grp__super.js
@@ -0,0 +1,7 @@
+var group__grp__super =
+[
+    [ "Conditional Random Field", "group__grp__crf.html", null ],
+    [ "Regression Models", "group__grp__regml.html", "group__grp__regml" ],
+    [ "Support Vector Machines", "group__grp__svm.html", null ],
+    [ "Tree Methods", "group__grp__tree.html", "group__grp__tree" ]
+];
\ No newline at end of file

[16/51] [partial] incubator-madlib-site git commit: Add v1.11 docs

Reply via email to