Repository: incubator-madlib-site
Updated Branches:
  refs/heads/asf-site d7cb76fa4 -> eaa6ce9d3


update K-means notebook with new array unnest function


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/commit/eaa6ce9d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/tree/eaa6ce9d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/diff/eaa6ce9d

Branch: refs/heads/asf-site
Commit: eaa6ce9d316d985bb3fc2ec342c1e0c1c69cf552
Parents: d7cb76f
Author: Frank McQuillan <fmcquil...@pivotal.io>
Authored: Thu Apr 27 15:20:36 2017 -0700
Committer: Frank McQuillan <fmcquil...@pivotal.io>
Committed: Thu Apr 27 15:20:36 2017 -0700

----------------------------------------------------------------------
 community-artifacts/Kmeans-v1.ipynb |  977 --------------------------
 community-artifacts/Kmeans-v2.ipynb | 1102 ++++++++++++++++++++++++++++++
 2 files changed, 1102 insertions(+), 977 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/eaa6ce9d/community-artifacts/Kmeans-v1.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Kmeans-v1.ipynb 
b/community-artifacts/Kmeans-v1.ipynb
deleted file mode 100644
index 1706667..0000000
--- a/community-artifacts/Kmeans-v1.ipynb
+++ /dev/null
@@ -1,977 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# K-means (MADlib v1.10+)\n",
-    "Demonstrates k-means including new array input in MADlib v1.10"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The sql extension is already loaded. To reload it, use:\n",
-      "  %reload_ext sql\n"
-     ]
-    }
-   ],
-   "source": [
-    "%load_ext sql"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "u'Connected: gpdbchina@madlib'"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%sql postgresql://gpdbchina@10.194.10.68:55000/madlib\n",
-    "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
-    "#%sql postgresql://gpadmin@54.197.30.46:10432/gpadmin"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>version</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>MADlib version: 1.10.0-dev, git revision: 
rel/v1.9.1-47-g2d5a5ed, cmake configuration time: Tue Feb  7 19:45:19 UTC 2017, 
build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C 
compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(u'MADlib version: 1.10.0-dev, git revision: rel/v1.9.1-47-g2d5a5ed, 
cmake configuration time: Tue Feb  7 19:45:19 UTC 2017, build type: Release, 
build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, 
C++ compiler: g++ 4.4.0',)]"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%sql select madlib.version();\n",
-    "#%sql select version();"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 1. Prepare some input data:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 59,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "Done.\n",
-      "10 rows affected.\n",
-      "10 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>pid</th>\n",
-       "        <th>points</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>[14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 
5.64, 1.04, 3.92, 1065.0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>[13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 
4.38, 1.05, 3.49, 1050.0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>[13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 
5.6799, 1.03, 3.17, 1185.0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>[14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 
7.8, 0.86, 3.45, 1480.0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>[13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 
4.32, 1.04, 2.93, 735.0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>[14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 
6.75, 1.05, 2.85, 1450.0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>[14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 
5.25, 1.02, 3.58, 1290.0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>[14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 
5.05, 1.06, 3.58, 1295.0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>[14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 
5.2, 1.08, 2.85, 1045.0]</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>[13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 
7.2199, 1.01, 3.55, 1045.0]</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, [14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 
1.04, 3.92, 1065.0]),\n",
-       " (2, [13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050.0]),\n",
-       " (3, [13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185.0]),\n",
-       " (4, [14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 
0.86, 3.45, 1480.0]),\n",
-       " (5, [13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 
1.04, 2.93, 735.0]),\n",
-       " (6, [14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 6.75, 
1.05, 2.85, 1450.0]),\n",
-       " (7, [14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290.0]),\n",
-       " (8, [14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 5.05, 
1.06, 3.58, 1295.0]),\n",
-       " (9, [14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 
2.85, 1045.0]),\n",
-       " (10, [13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 7.2199, 
1.01, 3.55, 1045.0])]"
-      ]
-     },
-     "execution_count": 59,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS km_sample;\n",
-    "\n",
-    "CREATE TABLE km_sample(pid int, points double precision[]);\n",
-    "\n",
-    "INSERT INTO km_sample VALUES\n",
-    "(1,  '{14.23, 1.71, 2.43, 15.6, 127, 2.8, 3.0600, 0.2800, 2.29, 5.64, 
1.04, 3.92, 1065}'),\n",
-    "(2,  '{13.2, 1.78, 2.14, 11.2, 1, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050}'),\n",
-    "(3,  '{13.16, 2.36,  2.67, 18.6, 101, 2.8,  3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185}'),\n",
-    "(4,  '{14.37, 1.95, 2.5, 16.8, 113, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 
3.45, 1480}'),\n",
-    "(5,  '{13.24, 2.59, 2.87, 21, 118, 2.8, 2.69, 0.39, 1.82, 4.32, 1.04, 
2.93, 735}'),\n",
-    "(6,  '{14.2, 1.76, 2.45, 15.2, 112, 3.27, 3.39, 0.34, 1.97, 6.75, 1.05, 
2.85, 1450}'),\n",
-    "(7,  '{14.39, 1.87, 2.45, 14.6, 96, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290}'),\n",
-    "(8,  '{14.06, 2.15, 2.61, 17.6, 121, 2.6, 2.51, 0.31, 1.25, 5.05, 1.06, 
3.58, 1295}'),\n",
-    "(9,  '{14.83, 1.64, 2.17, 14, 97, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 2.85, 
1045}'),\n",
-    "(10, '{13.86, 1.35, 2.27, 16, 98, 2.98, 3.15, 0.22, 1.8500, 7.2199, 1.01, 
3.55, 1045}');\n",
-    "\n",
-    "SELECT * FROM km_sample ORDER BY pid;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 2. Run k-means clustering using kmeans++ with centroid seeding:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>centroids</th>\n",
-       "        <th>cluster_variance</th>\n",
-       "        <th>objective_fn</th>\n",
-       "        <th>frac_reassigned</th>\n",
-       "        <th>num_iterations</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>[[14.036, 2.018, 2.536, 16.56, 108.6, 3.004, 3.03, 0.298, 
2.038, 6.10598, 1.004, 3.326, 1340.0], [13.872, 1.814, 2.376, 15.56, 88.2, 
2.806, 2.928, 0.288, 1.844, 5.35198, 1.044, 3.348, 988.0]]</td>\n",
-       "        <td>[60672.638245208, 90512.324426408]</td>\n",
-       "        <td>151184.962672</td>\n",
-       "        <td>0.0</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[([[14.036, 2.018, 2.536, 16.56, 108.6, 3.004, 3.03, 0.298, 2.038, 
6.10598, 1.004, 3.326, 1340.0], [13.872, 1.814, 2.376, 15.56, 88.2, 2.806, 
2.928, 0.288, 1.844, 5.35198, 1.044, 3.348, 988.0]], [60672.638245208, 
90512.324426408], 151184.962671616, 0.0, 2)]"
-      ]
-     },
-     "execution_count": 60,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "SELECT * FROM madlib.kmeanspp( 'km_sample',   -- Table of source data\n",
-    "                               'points',      -- Column containing point 
co-ordinates \n",
-    "                               2,             -- Number of centroids to 
calculate\n",
-    "                               'madlib.squared_dist_norm2',   -- Distance 
function\n",
-    "                               'madlib.avg',  -- Aggregate function\n",
-    "                               20,            -- Number of iterations\n",
-    "                               0.001          -- Fraction of centroids 
reassigned to keep iterating \n",
-    "                             );"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 3. Calculate the simplified silhouette coefficient:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>simple_silhouette</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0.707360426139</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(0.707360426138584,)]"
-      ]
-     },
-     "execution_count": 62,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "SELECT * FROM madlib.simple_silhouette( 'km_sample',\n",
-    "                                        'points',\n",
-    "                                        (SELECT centroids FROM\n",
-    "                                            
madlib.kmeanspp('km_sample',\n",
-    "                                                            'points',\n",
-    "                                                            2,\n",
-    "                                                            
'madlib.squared_dist_norm2',\n",
-    "                                                            
'madlib.avg',\n",
-    "                                                            20,\n",
-    "                                                            0.001)),\n",
-    "                                        'madlib.dist_norm2'\n",
-    "                                      );"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 4. Find the cluster assignment for each point:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "10 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>pid</th>\n",
-       "        <th>points</th>\n",
-       "        <th>cluster_id</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>[14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 
5.64, 1.04, 3.92, 1065.0]</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>[13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 
4.38, 1.05, 3.49, 1050.0]</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>[13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 
5.6799, 1.03, 3.17, 1185.0]</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>[14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 
7.8, 0.86, 3.45, 1480.0]</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>[13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 
4.32, 1.04, 2.93, 735.0]</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>[14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 
6.75, 1.05, 2.85, 1450.0]</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>[14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 
5.25, 1.02, 3.58, 1290.0]</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>[14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 
5.05, 1.06, 3.58, 1295.0]</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>[14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 
5.2, 1.08, 2.85, 1045.0]</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>[13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 
7.2199, 1.01, 3.55, 1045.0]</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, [14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 
1.04, 3.92, 1065.0], 0),\n",
-       " (2, [13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050.0], 0),\n",
-       " (3, [13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185.0], 1),\n",
-       " (4, [14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 
0.86, 3.45, 1480.0], 1),\n",
-       " (5, [13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 
1.04, 2.93, 735.0], 0),\n",
-       " (6, [14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 6.75, 
1.05, 2.85, 1450.0], 1),\n",
-       " (7, [14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290.0], 1),\n",
-       " (8, [14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 5.05, 
1.06, 3.58, 1295.0], 1),\n",
-       " (9, [14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 
2.85, 1045.0], 0),\n",
-       " (10, [13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 7.2199, 
1.01, 3.55, 1045.0], 0)]"
-      ]
-     },
-     "execution_count": 63,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS km_result;\n",
-    "\n",
-    "-- Run kmeans algorithm\n",
-    "CREATE TABLE km_result AS\n",
-    "SELECT * FROM madlib.kmeanspp('km_sample', 'points', 2,\n",
-    "                           'madlib.squared_dist_norm2',\n",
-    "                           'madlib.avg', 20, 0.001); \n",
-    "\n",
-    "-- Get point assignment\n",
-    "SELECT data.*,  (madlib.closest_column(centroids, points)).column_id as 
cluster_id\n",
-    "FROM km_sample as data, km_result\n",
-    "ORDER BY data.pid;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 5.0 Array input"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "Done.\n",
-      "10 rows affected.\n",
-      "10 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>pid</th>\n",
-       "        <th>p1</th>\n",
-       "        <th>p2</th>\n",
-       "        <th>p3</th>\n",
-       "        <th>p4</th>\n",
-       "        <th>p5</th>\n",
-       "        <th>p6</th>\n",
-       "        <th>p7</th>\n",
-       "        <th>p8</th>\n",
-       "        <th>p9</th>\n",
-       "        <th>p10</th>\n",
-       "        <th>p11</th>\n",
-       "        <th>p12</th>\n",
-       "        <th>p13</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>14.23</td>\n",
-       "        <td>1.71</td>\n",
-       "        <td>2.43</td>\n",
-       "        <td>15.6</td>\n",
-       "        <td>127.0</td>\n",
-       "        <td>2.8</td>\n",
-       "        <td>3.06</td>\n",
-       "        <td>0.28</td>\n",
-       "        <td>2.29</td>\n",
-       "        <td>5.64</td>\n",
-       "        <td>1.04</td>\n",
-       "        <td>3.92</td>\n",
-       "        <td>1065.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>13.2</td>\n",
-       "        <td>1.78</td>\n",
-       "        <td>2.14</td>\n",
-       "        <td>11.2</td>\n",
-       "        <td>1.0</td>\n",
-       "        <td>2.65</td>\n",
-       "        <td>2.76</td>\n",
-       "        <td>0.26</td>\n",
-       "        <td>1.28</td>\n",
-       "        <td>4.38</td>\n",
-       "        <td>1.05</td>\n",
-       "        <td>3.49</td>\n",
-       "        <td>1050.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>13.16</td>\n",
-       "        <td>2.36</td>\n",
-       "        <td>2.67</td>\n",
-       "        <td>18.6</td>\n",
-       "        <td>101.0</td>\n",
-       "        <td>2.8</td>\n",
-       "        <td>3.24</td>\n",
-       "        <td>0.3</td>\n",
-       "        <td>2.81</td>\n",
-       "        <td>5.6799</td>\n",
-       "        <td>1.03</td>\n",
-       "        <td>3.17</td>\n",
-       "        <td>1185.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>14.37</td>\n",
-       "        <td>1.95</td>\n",
-       "        <td>2.5</td>\n",
-       "        <td>16.8</td>\n",
-       "        <td>113.0</td>\n",
-       "        <td>3.85</td>\n",
-       "        <td>3.49</td>\n",
-       "        <td>0.24</td>\n",
-       "        <td>2.18</td>\n",
-       "        <td>7.8</td>\n",
-       "        <td>0.86</td>\n",
-       "        <td>3.45</td>\n",
-       "        <td>1480.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>13.24</td>\n",
-       "        <td>2.59</td>\n",
-       "        <td>2.87</td>\n",
-       "        <td>21.0</td>\n",
-       "        <td>118.0</td>\n",
-       "        <td>2.8</td>\n",
-       "        <td>2.69</td>\n",
-       "        <td>0.39</td>\n",
-       "        <td>1.82</td>\n",
-       "        <td>4.32</td>\n",
-       "        <td>1.04</td>\n",
-       "        <td>2.93</td>\n",
-       "        <td>735.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>14.2</td>\n",
-       "        <td>1.76</td>\n",
-       "        <td>2.45</td>\n",
-       "        <td>15.2</td>\n",
-       "        <td>112.0</td>\n",
-       "        <td>3.27</td>\n",
-       "        <td>3.39</td>\n",
-       "        <td>0.34</td>\n",
-       "        <td>1.97</td>\n",
-       "        <td>6.75</td>\n",
-       "        <td>1.05</td>\n",
-       "        <td>2.85</td>\n",
-       "        <td>1450.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>14.39</td>\n",
-       "        <td>1.87</td>\n",
-       "        <td>2.45</td>\n",
-       "        <td>14.6</td>\n",
-       "        <td>96.0</td>\n",
-       "        <td>2.5</td>\n",
-       "        <td>2.52</td>\n",
-       "        <td>0.3</td>\n",
-       "        <td>1.98</td>\n",
-       "        <td>5.25</td>\n",
-       "        <td>1.02</td>\n",
-       "        <td>3.58</td>\n",
-       "        <td>1290.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>14.06</td>\n",
-       "        <td>2.15</td>\n",
-       "        <td>2.61</td>\n",
-       "        <td>17.6</td>\n",
-       "        <td>121.0</td>\n",
-       "        <td>2.6</td>\n",
-       "        <td>2.51</td>\n",
-       "        <td>0.31</td>\n",
-       "        <td>1.25</td>\n",
-       "        <td>5.05</td>\n",
-       "        <td>1.06</td>\n",
-       "        <td>3.58</td>\n",
-       "        <td>1295.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>14.83</td>\n",
-       "        <td>1.64</td>\n",
-       "        <td>2.17</td>\n",
-       "        <td>14.0</td>\n",
-       "        <td>97.0</td>\n",
-       "        <td>2.8</td>\n",
-       "        <td>2.98</td>\n",
-       "        <td>0.29</td>\n",
-       "        <td>1.98</td>\n",
-       "        <td>5.2</td>\n",
-       "        <td>1.08</td>\n",
-       "        <td>2.85</td>\n",
-       "        <td>1045.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>13.86</td>\n",
-       "        <td>1.35</td>\n",
-       "        <td>2.27</td>\n",
-       "        <td>16.0</td>\n",
-       "        <td>98.0</td>\n",
-       "        <td>2.98</td>\n",
-       "        <td>3.15</td>\n",
-       "        <td>0.22</td>\n",
-       "        <td>1.85</td>\n",
-       "        <td>7.2199</td>\n",
-       "        <td>1.01</td>\n",
-       "        <td>3.55</td>\n",
-       "        <td>1045.0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 
1.04, 3.92, 1065.0),\n",
-       " (2, 13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050.0),\n",
-       " (3, 13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185.0),\n",
-       " (4, 14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 
3.45, 1480.0),\n",
-       " (5, 13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 
1.04, 2.93, 735.0),\n",
-       " (6, 14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 6.75, 
1.05, 2.85, 1450.0),\n",
-       " (7, 14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290.0),\n",
-       " (8, 14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 5.05, 
1.06, 3.58, 1295.0),\n",
-       " (9, 14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 
2.85, 1045.0),\n",
-       " (10, 13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 7.2199, 
1.01, 3.55, 1045.0)]"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS km_arrayin CASCADE;\n",
-    "\n",
-    "CREATE TABLE km_arrayin(pid int, \n",
-    "                        p1 float, \n",
-    "                        p2 float, \n",
-    "                        p3 float,\n",
-    "                        p4 float, \n",
-    "                        p5 float, \n",
-    "                        p6 float,\n",
-    "                        p7 float, \n",
-    "                        p8 float, \n",
-    "                        p9 float,\n",
-    "                        p10 float, \n",
-    "                        p11 float, \n",
-    "                        p12 float,\n",
-    "                        p13 float);\n",
-    "\n",
-    "\n",
-    "INSERT INTO km_arrayin VALUES\n",
-    "(1,  14.23, 1.71, 2.43, 15.6, 127, 2.8, 3.0600, 0.2800, 2.29, 5.64, 1.04, 
3.92, 1065),\n",
-    "(2,  13.2, 1.78, 2.14, 11.2, 1, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 3.49, 
1050),\n",
-    "(3,  13.16, 2.36,  2.67, 18.6, 101, 2.8,  3.24, 0.3, 2.81, 5.6799, 1.03, 
3.17, 1185),\n",
-    "(4,  14.37, 1.95, 2.5, 16.8, 113, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 
3.45, 1480),\n",
-    "(5,  13.24, 2.59, 2.87, 21, 118, 2.8, 2.69, 0.39, 1.82, 4.32, 1.04, 2.93, 
735),\n",
-    "(6,  14.2, 1.76, 2.45, 15.2, 112, 3.27, 3.39, 0.34, 1.97, 6.75, 1.05, 
2.85, 1450),\n",
-    "(7,  14.39, 1.87, 2.45, 14.6, 96, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 3.58, 
1290),\n",
-    "(8,  14.06, 2.15, 2.61, 17.6, 121, 2.6, 2.51, 0.31, 1.25, 5.05, 1.06, 
3.58, 1295),\n",
-    "(9,  14.83, 1.64, 2.17, 14, 97, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 2.85, 
1045),\n",
-    "(10, 13.86, 1.35, 2.27, 16, 98, 2.98, 3.15, 0.22, 1.8500, 7.2199, 1.01, 
3.55, 1045);\n",
-    "\n",
-    "SELECT * FROM km_arrayin ORDER BY pid;"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "10 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>pid</th>\n",
-       "        <th>p1</th>\n",
-       "        <th>p2</th>\n",
-       "        <th>p3</th>\n",
-       "        <th>p4</th>\n",
-       "        <th>p5</th>\n",
-       "        <th>p6</th>\n",
-       "        <th>p7</th>\n",
-       "        <th>p8</th>\n",
-       "        <th>p9</th>\n",
-       "        <th>p10</th>\n",
-       "        <th>p11</th>\n",
-       "        <th>p12</th>\n",
-       "        <th>p13</th>\n",
-       "        <th>cluster_id</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>14.23</td>\n",
-       "        <td>1.71</td>\n",
-       "        <td>2.43</td>\n",
-       "        <td>15.6</td>\n",
-       "        <td>127.0</td>\n",
-       "        <td>2.8</td>\n",
-       "        <td>3.06</td>\n",
-       "        <td>0.28</td>\n",
-       "        <td>2.29</td>\n",
-       "        <td>5.64</td>\n",
-       "        <td>1.04</td>\n",
-       "        <td>3.92</td>\n",
-       "        <td>1065.0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>13.2</td>\n",
-       "        <td>1.78</td>\n",
-       "        <td>2.14</td>\n",
-       "        <td>11.2</td>\n",
-       "        <td>1.0</td>\n",
-       "        <td>2.65</td>\n",
-       "        <td>2.76</td>\n",
-       "        <td>0.26</td>\n",
-       "        <td>1.28</td>\n",
-       "        <td>4.38</td>\n",
-       "        <td>1.05</td>\n",
-       "        <td>3.49</td>\n",
-       "        <td>1050.0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>13.16</td>\n",
-       "        <td>2.36</td>\n",
-       "        <td>2.67</td>\n",
-       "        <td>18.6</td>\n",
-       "        <td>101.0</td>\n",
-       "        <td>2.8</td>\n",
-       "        <td>3.24</td>\n",
-       "        <td>0.3</td>\n",
-       "        <td>2.81</td>\n",
-       "        <td>5.6799</td>\n",
-       "        <td>1.03</td>\n",
-       "        <td>3.17</td>\n",
-       "        <td>1185.0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>14.37</td>\n",
-       "        <td>1.95</td>\n",
-       "        <td>2.5</td>\n",
-       "        <td>16.8</td>\n",
-       "        <td>113.0</td>\n",
-       "        <td>3.85</td>\n",
-       "        <td>3.49</td>\n",
-       "        <td>0.24</td>\n",
-       "        <td>2.18</td>\n",
-       "        <td>7.8</td>\n",
-       "        <td>0.86</td>\n",
-       "        <td>3.45</td>\n",
-       "        <td>1480.0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>13.24</td>\n",
-       "        <td>2.59</td>\n",
-       "        <td>2.87</td>\n",
-       "        <td>21.0</td>\n",
-       "        <td>118.0</td>\n",
-       "        <td>2.8</td>\n",
-       "        <td>2.69</td>\n",
-       "        <td>0.39</td>\n",
-       "        <td>1.82</td>\n",
-       "        <td>4.32</td>\n",
-       "        <td>1.04</td>\n",
-       "        <td>2.93</td>\n",
-       "        <td>735.0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>14.2</td>\n",
-       "        <td>1.76</td>\n",
-       "        <td>2.45</td>\n",
-       "        <td>15.2</td>\n",
-       "        <td>112.0</td>\n",
-       "        <td>3.27</td>\n",
-       "        <td>3.39</td>\n",
-       "        <td>0.34</td>\n",
-       "        <td>1.97</td>\n",
-       "        <td>6.75</td>\n",
-       "        <td>1.05</td>\n",
-       "        <td>2.85</td>\n",
-       "        <td>1450.0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>14.39</td>\n",
-       "        <td>1.87</td>\n",
-       "        <td>2.45</td>\n",
-       "        <td>14.6</td>\n",
-       "        <td>96.0</td>\n",
-       "        <td>2.5</td>\n",
-       "        <td>2.52</td>\n",
-       "        <td>0.3</td>\n",
-       "        <td>1.98</td>\n",
-       "        <td>5.25</td>\n",
-       "        <td>1.02</td>\n",
-       "        <td>3.58</td>\n",
-       "        <td>1290.0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>14.06</td>\n",
-       "        <td>2.15</td>\n",
-       "        <td>2.61</td>\n",
-       "        <td>17.6</td>\n",
-       "        <td>121.0</td>\n",
-       "        <td>2.6</td>\n",
-       "        <td>2.51</td>\n",
-       "        <td>0.31</td>\n",
-       "        <td>1.25</td>\n",
-       "        <td>5.05</td>\n",
-       "        <td>1.06</td>\n",
-       "        <td>3.58</td>\n",
-       "        <td>1295.0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>14.83</td>\n",
-       "        <td>1.64</td>\n",
-       "        <td>2.17</td>\n",
-       "        <td>14.0</td>\n",
-       "        <td>97.0</td>\n",
-       "        <td>2.8</td>\n",
-       "        <td>2.98</td>\n",
-       "        <td>0.29</td>\n",
-       "        <td>1.98</td>\n",
-       "        <td>5.2</td>\n",
-       "        <td>1.08</td>\n",
-       "        <td>2.85</td>\n",
-       "        <td>1045.0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>13.86</td>\n",
-       "        <td>1.35</td>\n",
-       "        <td>2.27</td>\n",
-       "        <td>16.0</td>\n",
-       "        <td>98.0</td>\n",
-       "        <td>2.98</td>\n",
-       "        <td>3.15</td>\n",
-       "        <td>0.22</td>\n",
-       "        <td>1.85</td>\n",
-       "        <td>7.2199</td>\n",
-       "        <td>1.01</td>\n",
-       "        <td>3.55</td>\n",
-       "        <td>1045.0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 
1.04, 3.92, 1065.0, 1),\n",
-       " (2, 13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050.0, 1),\n",
-       " (3, 13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185.0, 1),\n",
-       " (4, 14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 
3.45, 1480.0, 0),\n",
-       " (5, 13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 
1.04, 2.93, 735.0, 1),\n",
-       " (6, 14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 6.75, 
1.05, 2.85, 1450.0, 0),\n",
-       " (7, 14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290.0, 0),\n",
-       " (8, 14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 5.05, 
1.06, 3.58, 1295.0, 0),\n",
-       " (9, 14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 
2.85, 1045.0, 1),\n",
-       " (10, 13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 7.2199, 
1.01, 3.55, 1045.0, 1)]"
-      ]
-     },
-     "execution_count": 67,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS km_result;\n",
-    "\n",
-    "-- Run kmeans algorithm\n",
-    "CREATE TABLE km_result AS\n",
-    "SELECT * FROM madlib.kmeans_random('km_arrayin', \n",
-    "                                'ARRAY[p1, p2, p3, p4, p5, p6, \n",
-    "                                      p7, p8, p9, p10, p11, p12, p13]', 
\n",
-    "                                2,\n",
-    "                                'madlib.squared_dist_norm2',\n",
-    "                                'madlib.avg', \n",
-    "                                20, \n",
-    "                                0.001);\n",
-    "\n",
-    "-- Get point assignment\n",
-    "SELECT data.*,  (madlib.closest_column(centroids, \n",
-    "                                       ARRAY[p1, p2, p3, p4, p5, p6, \n",
-    "                                      p7, p8, p9, p10, p11, p12, 
p13])).column_id as cluster_id\n",
-    "FROM km_arrayin as data, km_result\n",
-    "ORDER BY data.pid;"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/eaa6ce9d/community-artifacts/Kmeans-v2.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Kmeans-v2.ipynb 
b/community-artifacts/Kmeans-v2.ipynb
new file mode 100644
index 0000000..6d3e40e
--- /dev/null
+++ b/community-artifacts/Kmeans-v2.ipynb
@@ -0,0 +1,1102 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# K-means (MADlib v1.11+)\n",
+    "Demonstrates k-means including new array input in 1.10 and new array 
unnest function in 1.11."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The sql extension is already loaded. To reload it, use:\n",
+      "  %reload_ext sql\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "u'Connected: gpdbchina@madlib'"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Greenplum 4.3.10.0\n",
+    "%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n",
+    "        \n",
+    "# PostgreSQL local\n",
+    "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
+    "\n",
+    "# Greenplum 4.2.3.0\n",
+    "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>version</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>MADlib version: 1.11-dev, git revision: 
rel/v1.10.0-26-ga3d54be, cmake configuration time: Thu Apr 27 01:01:30 UTC 
2017, build type: Release, build system: 
Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: 
g++ 4.4.0</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'MADlib version: 1.11-dev, git revision: rel/v1.10.0-26-ga3d54be, 
cmake configuration time: Thu Apr 27 01:01:30 UTC 2017, build type: Release, 
build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, 
C++ compiler: g++ 4.4.0',)]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%sql select madlib.version();\n",
+    "#%sql select version();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Prepare some input data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "10 rows affected.\n",
+      "10 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>pid</th>\n",
+       "        <th>points</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>[14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 
5.64, 1.04, 3.92, 1065.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>[13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 
4.38, 1.05, 3.49, 1050.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>[13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 
5.6799, 1.03, 3.17, 1185.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>[14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 
7.8, 0.86, 3.45, 1480.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>[13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 
4.32, 1.04, 2.93, 735.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>[14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 
6.75, 1.05, 2.85, 1450.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>7</td>\n",
+       "        <td>[14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 
5.25, 1.02, 3.58, 1290.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>[14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 
5.05, 1.06, 3.58, 1295.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>[14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 
5.2, 1.08, 2.85, 1045.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>[13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 
7.2199, 1.01, 3.55, 1045.0]</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, [14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 
1.04, 3.92, 1065.0]),\n",
+       " (2, [13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050.0]),\n",
+       " (3, [13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185.0]),\n",
+       " (4, [14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 
0.86, 3.45, 1480.0]),\n",
+       " (5, [13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 
1.04, 2.93, 735.0]),\n",
+       " (6, [14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 6.75, 
1.05, 2.85, 1450.0]),\n",
+       " (7, [14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290.0]),\n",
+       " (8, [14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 5.05, 
1.06, 3.58, 1295.0]),\n",
+       " (9, [14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 
2.85, 1045.0]),\n",
+       " (10, [13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 7.2199, 
1.01, 3.55, 1045.0])]"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS km_sample;\n",
+    "\n",
+    "CREATE TABLE km_sample(pid int, points double precision[]);\n",
+    "\n",
+    "INSERT INTO km_sample VALUES\n",
+    "(1,  '{14.23, 1.71, 2.43, 15.6, 127, 2.8, 3.0600, 0.2800, 2.29, 5.64, 
1.04, 3.92, 1065}'),\n",
+    "(2,  '{13.2, 1.78, 2.14, 11.2, 1, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050}'),\n",
+    "(3,  '{13.16, 2.36,  2.67, 18.6, 101, 2.8,  3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185}'),\n",
+    "(4,  '{14.37, 1.95, 2.5, 16.8, 113, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 
3.45, 1480}'),\n",
+    "(5,  '{13.24, 2.59, 2.87, 21, 118, 2.8, 2.69, 0.39, 1.82, 4.32, 1.04, 
2.93, 735}'),\n",
+    "(6,  '{14.2, 1.76, 2.45, 15.2, 112, 3.27, 3.39, 0.34, 1.97, 6.75, 1.05, 
2.85, 1450}'),\n",
+    "(7,  '{14.39, 1.87, 2.45, 14.6, 96, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290}'),\n",
+    "(8,  '{14.06, 2.15, 2.61, 17.6, 121, 2.6, 2.51, 0.31, 1.25, 5.05, 1.06, 
3.58, 1295}'),\n",
+    "(9,  '{14.83, 1.64, 2.17, 14, 97, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 2.85, 
1045}'),\n",
+    "(10, '{13.86, 1.35, 2.27, 16, 98, 2.98, 3.15, 0.22, 1.8500, 7.2199, 1.01, 
3.55, 1045}');\n",
+    "\n",
+    "SELECT * FROM km_sample ORDER BY pid;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Run k-means clustering using kmeans++ with centroid seeding:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>centroids</th>\n",
+       "        <th>cluster_variance</th>\n",
+       "        <th>objective_fn</th>\n",
+       "        <th>frac_reassigned</th>\n",
+       "        <th>num_iterations</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>[[13.872, 1.814, 2.376, 15.56, 88.2, 2.806, 2.928, 0.288, 
1.844, 5.35198, 1.044, 3.348, 988.0], [14.036, 2.018, 2.536, 16.56, 108.6, 
3.004, 3.03, 0.298, 2.038, 6.10598, 1.004, 3.326, 1340.0]]</td>\n",
+       "        <td>[90512.324426408, 60672.638245208]</td>\n",
+       "        <td>151184.962672</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[([[13.872, 1.814, 2.376, 15.56, 88.2, 2.806, 2.928, 0.288, 1.844, 
5.35198, 1.044, 3.348, 988.0], [14.036, 2.018, 2.536, 16.56, 108.6, 3.004, 
3.03, 0.298, 2.038, 6.10598, 1.004, 3.326, 1340.0]], [90512.324426408, 
60672.638245208], 151184.962671616, 0.0, 2)]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS km_result;\n",
+    "\n",
+    "-- Run kmeans algorithm\n",
+    "CREATE TABLE km_result AS\n",
+    "SELECT * FROM madlib.kmeanspp( 'km_sample',   -- Table of source data\n",
+    "                               'points',      -- Column containing point 
co-ordinates \n",
+    "                               2,             -- Number of centroids to 
calculate\n",
+    "                               'madlib.squared_dist_norm2',   -- Distance 
function\n",
+    "                               'madlib.avg',  -- Aggregate function\n",
+    "                               20,            -- Number of iterations\n",
+    "                               0.001          -- Fraction of centroids 
reassigned to keep iterating \n",
+    "                             );\n",
+    "\n",
+    "SELECT * FROM km_result;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Calculate the simplified silhouette coefficient:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>simple_silhouette</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0.689788048829</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0.68978804882941,)]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM madlib.simple_silhouette( 'km_sample',          -- Input 
points table\n",
+    "                                        'points',             -- Column 
containing points\n",
+    "                                        (SELECT centroids FROM 
km_result),  -- Centroids\n",
+    "                                        'madlib.dist_norm2'   -- Distance 
function\n",
+    "                                      );"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Find the cluster assignment for each point:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>pid</th>\n",
+       "        <th>points</th>\n",
+       "        <th>cluster_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>[14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 
5.64, 1.04, 3.92, 1065.0]</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>[13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 
4.38, 1.05, 3.49, 1050.0]</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>[13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 
5.6799, 1.03, 3.17, 1185.0]</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>[14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 
7.8, 0.86, 3.45, 1480.0]</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>[13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 
4.32, 1.04, 2.93, 735.0]</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>[14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 
6.75, 1.05, 2.85, 1450.0]</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>7</td>\n",
+       "        <td>[14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 
5.25, 1.02, 3.58, 1290.0]</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>[14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 
5.05, 1.06, 3.58, 1295.0]</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>[14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 
5.2, 1.08, 2.85, 1045.0]</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>[13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 
7.2199, 1.01, 3.55, 1045.0]</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, [14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 
1.04, 3.92, 1065.0], 0),\n",
+       " (2, [13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050.0], 0),\n",
+       " (3, [13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185.0], 1),\n",
+       " (4, [14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 
0.86, 3.45, 1480.0], 1),\n",
+       " (5, [13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 
1.04, 2.93, 735.0], 0),\n",
+       " (6, [14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 6.75, 
1.05, 2.85, 1450.0], 1),\n",
+       " (7, [14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290.0], 1),\n",
+       " (8, [14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 5.05, 
1.06, 3.58, 1295.0], 1),\n",
+       " (9, [14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 
2.85, 1045.0], 0),\n",
+       " (10, [13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 7.2199, 
1.01, 3.55, 1045.0], 0)]"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT data.*,  (madlib.closest_column(centroids, points)).column_id as 
cluster_id\n",
+    "FROM km_sample as data, km_result\n",
+    "ORDER BY data.pid;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Unnest the cluster centroids 2-D array to get a set of 1-D centroid 
arrays"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "2 rows affected.\n",
+      "2 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>unnest_row_id</th>\n",
+       "        <th>unnest_result</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>[13.872, 1.814, 2.376, 15.56, 88.2, 2.806, 2.928, 0.288, 
1.844, 5.35198, 1.044, 3.348, 988.0]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>[14.036, 2.018, 2.536, 16.56, 108.6, 3.004, 3.03, 0.298, 
2.038, 6.10598, 1.004, 3.326, 1340.0]</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, [13.872, 1.814, 2.376, 15.56, 88.2, 2.806, 2.928, 0.288, 1.844, 
5.35198, 1.044, 3.348, 988.0]),\n",
+       " (2, [14.036, 2.018, 2.536, 16.56, 108.6, 3.004, 3.03, 0.298, 2.038, 
6.10598, 1.004, 3.326, 1340.0])]"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS km_centroids_unnest;\n",
+    "\n",
+    "-- Run unnest function\n",
+    "CREATE TABLE km_centroids_unnest AS\n",
+    "SELECT (madlib.array_unnest_2d_to_1d(centroids)).*\n",
+    "FROM km_result;\n",
+    "\n",
+    "SELECT * FROM km_centroids_unnest ORDER BY 1;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that the ID column returned by array_unnest_2d_to_1d() is not the 
same as the cluster ID assigned by k-means. See below to display the cluster 
IDs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.  Display cluster ID\n",
+    "Create cluster IDs for 1-D centroid arrays so that cluster ID for any 
centroid can be matched to the cluster assignment for the data points:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>unnest_row_id</th>\n",
+       "        <th>unnest_result</th>\n",
+       "        <th>cluster_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>[13.872, 1.814, 2.376, 15.56, 88.2, 2.806, 2.928, 0.288, 
1.844, 5.35198, 1.044, 3.348, 988.0]</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>[14.036, 2.018, 2.536, 16.56, 108.6, 3.004, 3.03, 0.298, 
2.038, 6.10598, 1.004, 3.326, 1340.0]</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, [13.872, 1.814, 2.376, 15.56, 88.2, 2.806, 2.928, 0.288, 1.844, 
5.35198, 1.044, 3.348, 988.0], 0),\n",
+       " (2, [14.036, 2.018, 2.536, 16.56, 108.6, 3.004, 3.03, 0.298, 2.038, 
6.10598, 1.004, 3.326, 1340.0], 1)]"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT cent.*,  (madlib.closest_column(centroids, 
unnest_result)).column_id as cluster_id\n",
+    "FROM km_centroids_unnest as cent, km_result\n",
+    "ORDER BY cent.unnest_row_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. Array input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "10 rows affected.\n",
+      "10 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>pid</th>\n",
+       "        <th>p1</th>\n",
+       "        <th>p2</th>\n",
+       "        <th>p3</th>\n",
+       "        <th>p4</th>\n",
+       "        <th>p5</th>\n",
+       "        <th>p6</th>\n",
+       "        <th>p7</th>\n",
+       "        <th>p8</th>\n",
+       "        <th>p9</th>\n",
+       "        <th>p10</th>\n",
+       "        <th>p11</th>\n",
+       "        <th>p12</th>\n",
+       "        <th>p13</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>14.23</td>\n",
+       "        <td>1.71</td>\n",
+       "        <td>2.43</td>\n",
+       "        <td>15.6</td>\n",
+       "        <td>127.0</td>\n",
+       "        <td>2.8</td>\n",
+       "        <td>3.06</td>\n",
+       "        <td>0.28</td>\n",
+       "        <td>2.29</td>\n",
+       "        <td>5.64</td>\n",
+       "        <td>1.04</td>\n",
+       "        <td>3.92</td>\n",
+       "        <td>1065.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>13.2</td>\n",
+       "        <td>1.78</td>\n",
+       "        <td>2.14</td>\n",
+       "        <td>11.2</td>\n",
+       "        <td>1.0</td>\n",
+       "        <td>2.65</td>\n",
+       "        <td>2.76</td>\n",
+       "        <td>0.26</td>\n",
+       "        <td>1.28</td>\n",
+       "        <td>4.38</td>\n",
+       "        <td>1.05</td>\n",
+       "        <td>3.49</td>\n",
+       "        <td>1050.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>13.16</td>\n",
+       "        <td>2.36</td>\n",
+       "        <td>2.67</td>\n",
+       "        <td>18.6</td>\n",
+       "        <td>101.0</td>\n",
+       "        <td>2.8</td>\n",
+       "        <td>3.24</td>\n",
+       "        <td>0.3</td>\n",
+       "        <td>2.81</td>\n",
+       "        <td>5.6799</td>\n",
+       "        <td>1.03</td>\n",
+       "        <td>3.17</td>\n",
+       "        <td>1185.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>14.37</td>\n",
+       "        <td>1.95</td>\n",
+       "        <td>2.5</td>\n",
+       "        <td>16.8</td>\n",
+       "        <td>113.0</td>\n",
+       "        <td>3.85</td>\n",
+       "        <td>3.49</td>\n",
+       "        <td>0.24</td>\n",
+       "        <td>2.18</td>\n",
+       "        <td>7.8</td>\n",
+       "        <td>0.86</td>\n",
+       "        <td>3.45</td>\n",
+       "        <td>1480.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>13.24</td>\n",
+       "        <td>2.59</td>\n",
+       "        <td>2.87</td>\n",
+       "        <td>21.0</td>\n",
+       "        <td>118.0</td>\n",
+       "        <td>2.8</td>\n",
+       "        <td>2.69</td>\n",
+       "        <td>0.39</td>\n",
+       "        <td>1.82</td>\n",
+       "        <td>4.32</td>\n",
+       "        <td>1.04</td>\n",
+       "        <td>2.93</td>\n",
+       "        <td>735.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>14.2</td>\n",
+       "        <td>1.76</td>\n",
+       "        <td>2.45</td>\n",
+       "        <td>15.2</td>\n",
+       "        <td>112.0</td>\n",
+       "        <td>3.27</td>\n",
+       "        <td>3.39</td>\n",
+       "        <td>0.34</td>\n",
+       "        <td>1.97</td>\n",
+       "        <td>6.75</td>\n",
+       "        <td>1.05</td>\n",
+       "        <td>2.85</td>\n",
+       "        <td>1450.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>7</td>\n",
+       "        <td>14.39</td>\n",
+       "        <td>1.87</td>\n",
+       "        <td>2.45</td>\n",
+       "        <td>14.6</td>\n",
+       "        <td>96.0</td>\n",
+       "        <td>2.5</td>\n",
+       "        <td>2.52</td>\n",
+       "        <td>0.3</td>\n",
+       "        <td>1.98</td>\n",
+       "        <td>5.25</td>\n",
+       "        <td>1.02</td>\n",
+       "        <td>3.58</td>\n",
+       "        <td>1290.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>14.06</td>\n",
+       "        <td>2.15</td>\n",
+       "        <td>2.61</td>\n",
+       "        <td>17.6</td>\n",
+       "        <td>121.0</td>\n",
+       "        <td>2.6</td>\n",
+       "        <td>2.51</td>\n",
+       "        <td>0.31</td>\n",
+       "        <td>1.25</td>\n",
+       "        <td>5.05</td>\n",
+       "        <td>1.06</td>\n",
+       "        <td>3.58</td>\n",
+       "        <td>1295.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>14.83</td>\n",
+       "        <td>1.64</td>\n",
+       "        <td>2.17</td>\n",
+       "        <td>14.0</td>\n",
+       "        <td>97.0</td>\n",
+       "        <td>2.8</td>\n",
+       "        <td>2.98</td>\n",
+       "        <td>0.29</td>\n",
+       "        <td>1.98</td>\n",
+       "        <td>5.2</td>\n",
+       "        <td>1.08</td>\n",
+       "        <td>2.85</td>\n",
+       "        <td>1045.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>13.86</td>\n",
+       "        <td>1.35</td>\n",
+       "        <td>2.27</td>\n",
+       "        <td>16.0</td>\n",
+       "        <td>98.0</td>\n",
+       "        <td>2.98</td>\n",
+       "        <td>3.15</td>\n",
+       "        <td>0.22</td>\n",
+       "        <td>1.85</td>\n",
+       "        <td>7.2199</td>\n",
+       "        <td>1.01</td>\n",
+       "        <td>3.55</td>\n",
+       "        <td>1045.0</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, 14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 
1.04, 3.92, 1065.0),\n",
+       " (2, 13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050.0),\n",
+       " (3, 13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185.0),\n",
+       " (4, 14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 
3.45, 1480.0),\n",
+       " (5, 13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 
1.04, 2.93, 735.0),\n",
+       " (6, 14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 6.75, 
1.05, 2.85, 1450.0),\n",
+       " (7, 14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290.0),\n",
+       " (8, 14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 5.05, 
1.06, 3.58, 1295.0),\n",
+       " (9, 14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 
2.85, 1045.0),\n",
+       " (10, 13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 7.2199, 
1.01, 3.55, 1045.0)]"
+      ]
+     },
+     "execution_count": 64,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS km_arrayin CASCADE;\n",
+    "\n",
+    "CREATE TABLE km_arrayin(pid int, \n",
+    "                        p1 float, \n",
+    "                        p2 float, \n",
+    "                        p3 float,\n",
+    "                        p4 float, \n",
+    "                        p5 float, \n",
+    "                        p6 float,\n",
+    "                        p7 float, \n",
+    "                        p8 float, \n",
+    "                        p9 float,\n",
+    "                        p10 float, \n",
+    "                        p11 float, \n",
+    "                        p12 float,\n",
+    "                        p13 float);\n",
+    "\n",
+    "INSERT INTO km_arrayin VALUES\n",
+    "(1,  14.23, 1.71, 2.43, 15.6, 127, 2.8, 3.0600, 0.2800, 2.29, 5.64, 1.04, 
3.92, 1065),\n",
+    "(2,  13.2, 1.78, 2.14, 11.2, 1, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 3.49, 
1050),\n",
+    "(3,  13.16, 2.36,  2.67, 18.6, 101, 2.8,  3.24, 0.3, 2.81, 5.6799, 1.03, 
3.17, 1185),\n",
+    "(4,  14.37, 1.95, 2.5, 16.8, 113, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 
3.45, 1480),\n",
+    "(5,  13.24, 2.59, 2.87, 21, 118, 2.8, 2.69, 0.39, 1.82, 4.32, 1.04, 2.93, 
735),\n",
+    "(6,  14.2, 1.76, 2.45, 15.2, 112, 3.27, 3.39, 0.34, 1.97, 6.75, 1.05, 
2.85, 1450),\n",
+    "(7,  14.39, 1.87, 2.45, 14.6, 96, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 3.58, 
1290),\n",
+    "(8,  14.06, 2.15, 2.61, 17.6, 121, 2.6, 2.51, 0.31, 1.25, 5.05, 1.06, 
3.58, 1295),\n",
+    "(9,  14.83, 1.64, 2.17, 14, 97, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 2.85, 
1045),\n",
+    "(10, 13.86, 1.35, 2.27, 16, 98, 2.98, 3.15, 0.22, 1.8500, 7.2199, 1.01, 
3.55, 1045);\n",
+    "\n",
+    "SELECT * FROM km_arrayin ORDER BY pid;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "10 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>pid</th>\n",
+       "        <th>p1</th>\n",
+       "        <th>p2</th>\n",
+       "        <th>p3</th>\n",
+       "        <th>p4</th>\n",
+       "        <th>p5</th>\n",
+       "        <th>p6</th>\n",
+       "        <th>p7</th>\n",
+       "        <th>p8</th>\n",
+       "        <th>p9</th>\n",
+       "        <th>p10</th>\n",
+       "        <th>p11</th>\n",
+       "        <th>p12</th>\n",
+       "        <th>p13</th>\n",
+       "        <th>cluster_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>14.23</td>\n",
+       "        <td>1.71</td>\n",
+       "        <td>2.43</td>\n",
+       "        <td>15.6</td>\n",
+       "        <td>127.0</td>\n",
+       "        <td>2.8</td>\n",
+       "        <td>3.06</td>\n",
+       "        <td>0.28</td>\n",
+       "        <td>2.29</td>\n",
+       "        <td>5.64</td>\n",
+       "        <td>1.04</td>\n",
+       "        <td>3.92</td>\n",
+       "        <td>1065.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>13.2</td>\n",
+       "        <td>1.78</td>\n",
+       "        <td>2.14</td>\n",
+       "        <td>11.2</td>\n",
+       "        <td>1.0</td>\n",
+       "        <td>2.65</td>\n",
+       "        <td>2.76</td>\n",
+       "        <td>0.26</td>\n",
+       "        <td>1.28</td>\n",
+       "        <td>4.38</td>\n",
+       "        <td>1.05</td>\n",
+       "        <td>3.49</td>\n",
+       "        <td>1050.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>13.16</td>\n",
+       "        <td>2.36</td>\n",
+       "        <td>2.67</td>\n",
+       "        <td>18.6</td>\n",
+       "        <td>101.0</td>\n",
+       "        <td>2.8</td>\n",
+       "        <td>3.24</td>\n",
+       "        <td>0.3</td>\n",
+       "        <td>2.81</td>\n",
+       "        <td>5.6799</td>\n",
+       "        <td>1.03</td>\n",
+       "        <td>3.17</td>\n",
+       "        <td>1185.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>14.37</td>\n",
+       "        <td>1.95</td>\n",
+       "        <td>2.5</td>\n",
+       "        <td>16.8</td>\n",
+       "        <td>113.0</td>\n",
+       "        <td>3.85</td>\n",
+       "        <td>3.49</td>\n",
+       "        <td>0.24</td>\n",
+       "        <td>2.18</td>\n",
+       "        <td>7.8</td>\n",
+       "        <td>0.86</td>\n",
+       "        <td>3.45</td>\n",
+       "        <td>1480.0</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>13.24</td>\n",
+       "        <td>2.59</td>\n",
+       "        <td>2.87</td>\n",
+       "        <td>21.0</td>\n",
+       "        <td>118.0</td>\n",
+       "        <td>2.8</td>\n",
+       "        <td>2.69</td>\n",
+       "        <td>0.39</td>\n",
+       "        <td>1.82</td>\n",
+       "        <td>4.32</td>\n",
+       "        <td>1.04</td>\n",
+       "        <td>2.93</td>\n",
+       "        <td>735.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>14.2</td>\n",
+       "        <td>1.76</td>\n",
+       "        <td>2.45</td>\n",
+       "        <td>15.2</td>\n",
+       "        <td>112.0</td>\n",
+       "        <td>3.27</td>\n",
+       "        <td>3.39</td>\n",
+       "        <td>0.34</td>\n",
+       "        <td>1.97</td>\n",
+       "        <td>6.75</td>\n",
+       "        <td>1.05</td>\n",
+       "        <td>2.85</td>\n",
+       "        <td>1450.0</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>7</td>\n",
+       "        <td>14.39</td>\n",
+       "        <td>1.87</td>\n",
+       "        <td>2.45</td>\n",
+       "        <td>14.6</td>\n",
+       "        <td>96.0</td>\n",
+       "        <td>2.5</td>\n",
+       "        <td>2.52</td>\n",
+       "        <td>0.3</td>\n",
+       "        <td>1.98</td>\n",
+       "        <td>5.25</td>\n",
+       "        <td>1.02</td>\n",
+       "        <td>3.58</td>\n",
+       "        <td>1290.0</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>14.06</td>\n",
+       "        <td>2.15</td>\n",
+       "        <td>2.61</td>\n",
+       "        <td>17.6</td>\n",
+       "        <td>121.0</td>\n",
+       "        <td>2.6</td>\n",
+       "        <td>2.51</td>\n",
+       "        <td>0.31</td>\n",
+       "        <td>1.25</td>\n",
+       "        <td>5.05</td>\n",
+       "        <td>1.06</td>\n",
+       "        <td>3.58</td>\n",
+       "        <td>1295.0</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>14.83</td>\n",
+       "        <td>1.64</td>\n",
+       "        <td>2.17</td>\n",
+       "        <td>14.0</td>\n",
+       "        <td>97.0</td>\n",
+       "        <td>2.8</td>\n",
+       "        <td>2.98</td>\n",
+       "        <td>0.29</td>\n",
+       "        <td>1.98</td>\n",
+       "        <td>5.2</td>\n",
+       "        <td>1.08</td>\n",
+       "        <td>2.85</td>\n",
+       "        <td>1045.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>13.86</td>\n",
+       "        <td>1.35</td>\n",
+       "        <td>2.27</td>\n",
+       "        <td>16.0</td>\n",
+       "        <td>98.0</td>\n",
+       "        <td>2.98</td>\n",
+       "        <td>3.15</td>\n",
+       "        <td>0.22</td>\n",
+       "        <td>1.85</td>\n",
+       "        <td>7.2199</td>\n",
+       "        <td>1.01</td>\n",
+       "        <td>3.55</td>\n",
+       "        <td>1045.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, 14.23, 1.71, 2.43, 15.6, 127.0, 2.8, 3.06, 0.28, 2.29, 5.64, 
1.04, 3.92, 1065.0, 1),\n",
+       " (2, 13.2, 1.78, 2.14, 11.2, 1.0, 2.65, 2.76, 0.26, 1.28, 4.38, 1.05, 
3.49, 1050.0, 1),\n",
+       " (3, 13.16, 2.36, 2.67, 18.6, 101.0, 2.8, 3.24, 0.3, 2.81, 5.6799, 
1.03, 3.17, 1185.0, 1),\n",
+       " (4, 14.37, 1.95, 2.5, 16.8, 113.0, 3.85, 3.49, 0.24, 2.18, 7.8, 0.86, 
3.45, 1480.0, 0),\n",
+       " (5, 13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 
1.04, 2.93, 735.0, 1),\n",
+       " (6, 14.2, 1.76, 2.45, 15.2, 112.0, 3.27, 3.39, 0.34, 1.97, 6.75, 
1.05, 2.85, 1450.0, 0),\n",
+       " (7, 14.39, 1.87, 2.45, 14.6, 96.0, 2.5, 2.52, 0.3, 1.98, 5.25, 1.02, 
3.58, 1290.0, 0),\n",
+       " (8, 14.06, 2.15, 2.61, 17.6, 121.0, 2.6, 2.51, 0.31, 1.25, 5.05, 
1.06, 3.58, 1295.0, 0),\n",
+       " (9, 14.83, 1.64, 2.17, 14.0, 97.0, 2.8, 2.98, 0.29, 1.98, 5.2, 1.08, 
2.85, 1045.0, 1),\n",
+       " (10, 13.86, 1.35, 2.27, 16.0, 98.0, 2.98, 3.15, 0.22, 1.85, 7.2199, 
1.01, 3.55, 1045.0, 1)]"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS km_result;\n",
+    "\n",
+    "-- Run kmeans algorithm\n",
+    "CREATE TABLE km_result AS\n",
+    "SELECT * FROM madlib.kmeans_random('km_arrayin', \n",
+    "                                'ARRAY[p1, p2, p3, p4, p5, p6, \n",
+    "                                      p7, p8, p9, p10, p11, p12, p13]', 
\n",
+    "                                2,\n",
+    "                                'madlib.squared_dist_norm2',\n",
+    "                                'madlib.avg', \n",
+    "                                20, \n",
+    "                                0.001);\n",
+    "\n",
+    "-- Get point assignment\n",
+    "SELECT data.*,  (madlib.closest_column(centroids, \n",
+    "                                       ARRAY[p1, p2, p3, p4, p5, p6, \n",
+    "                                      p7, p8, p9, p10, p11, p12, 
p13])).column_id as cluster_id\n",
+    "FROM km_arrayin as data, km_result\n",
+    "ORDER BY data.pid;"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

Reply via email to