Repository: incubator-madlib-site
Updated Branches:
  refs/heads/asf-site 12422c78a -> de140c36b


update weakly connected cpts workbook with helper functions


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/commit/de140c36
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/tree/de140c36
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/diff/de140c36

Branch: refs/heads/asf-site
Commit: de140c36b2cd6ad091637d3f5444edecbc0f739d
Parents: 12422c7
Author: Frank McQuillan <fmcquil...@pivotal.io>
Authored: Fri Jul 28 11:54:29 2017 -0700
Committer: Frank McQuillan <fmcquil...@pivotal.io>
Committed: Fri Jul 28 11:54:29 2017 -0700

----------------------------------------------------------------------
 .../Weakly-connected-cpts-v1.ipynb              | 584 ------------
 .../Weakly-connected-cpts-v2.ipynb              | 908 +++++++++++++++++++
 2 files changed, 908 insertions(+), 584 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/de140c36/community-artifacts/Weakly-connected-cpts-v1.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Weakly-connected-cpts-v1.ipynb 
b/community-artifacts/Weakly-connected-cpts-v1.ipynb
deleted file mode 100644
index 813692b..0000000
--- a/community-artifacts/Weakly-connected-cpts-v1.ipynb
+++ /dev/null
@@ -1,584 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Weakly connected components\n",
-    "\n",
-    "Weakly connected components was added in MADlib 1.12."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The sql extension is already loaded. To reload it, use:\n",
-      "  %reload_ext sql\n"
-     ]
-    }
-   ],
-   "source": [
-    "%load_ext sql"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "u'Connected: gpdbchina@madlib'"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Greenplum 4.3.10.0\n",
-    "%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n",
-    "        \n",
-    "# PostgreSQL local\n",
-    "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
-    "\n",
-    "# Greenplum 4.2.3.0\n",
-    "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>version</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>MADlib version: 1.12-dev, git revision: 
rc/v1.9alpha-rc1-188-ge47dc8a, cmake configuration time: Wed Jul  5 18:23:50 
UTC 2017, build type: Release, build system: 
Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: 
g++ 4.4.0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(u'MADlib version: 1.12-dev, git revision: 
rc/v1.9alpha-rc1-188-ge47dc8a, cmake configuration time: Wed Jul  5 18:23:50 
UTC 2017, build type: Release, build system: 
Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: 
g++ 4.4.0',)]"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%sql select madlib.version();\n",
-    "#%sql select version();"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 1.  Create vertex and edge tables"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "Done.\n",
-      "Done.\n",
-      "14 rows affected.\n",
-      "18 rows affected.\n",
-      "18 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>src</th>\n",
-       "        <th>dest</th>\n",
-       "        <th>user_id</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>2</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>3</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>3</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>5</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>6</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>6</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>3</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>11</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>12</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>12</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>13</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>13</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>10</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>14</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>16</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(0, 1, 1),\n",
-       " (0, 2, 1),\n",
-       " (1, 2, 1),\n",
-       " (1, 3, 1),\n",
-       " (2, 3, 1),\n",
-       " (2, 5, 1),\n",
-       " (2, 6, 1),\n",
-       " (3, 0, 1),\n",
-       " (5, 6, 1),\n",
-       " (6, 3, 1),\n",
-       " (10, 11, 2),\n",
-       " (10, 12, 2),\n",
-       " (11, 12, 2),\n",
-       " (11, 13, 2),\n",
-       " (12, 13, 2),\n",
-       " (13, 10, 2),\n",
-       " (15, 14, 2),\n",
-       " (15, 16, 2)]"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql \n",
-    "DROP TABLE IF EXISTS vertex, edge;\n",
-    "\n",
-    "CREATE TABLE vertex(\n",
-    "    id INTEGER\n",
-    ");\n",
-    "\n",
-    "CREATE TABLE edge(\n",
-    "    src INTEGER,\n",
-    "    dest INTEGER,\n",
-    "    user_id INTEGER\n",
-    ");\n",
-    "\n",
-    "INSERT INTO vertex VALUES\n",
-    "(0),\n",
-    "(1),\n",
-    "(2),\n",
-    "(3),\n",
-    "(4),\n",
-    "(5),\n",
-    "(6),\n",
-    "(10),\n",
-    "(11),\n",
-    "(12),\n",
-    "(13),\n",
-    "(14),\n",
-    "(15),\n",
-    "(16);\n",
-    "\n",
-    "INSERT INTO edge VALUES\n",
-    "(0, 1, 1),\n",
-    "(0, 2, 1),\n",
-    "(1, 2, 1),\n",
-    "(1, 3, 1),\n",
-    "(2, 3, 1),\n",
-    "(2, 5, 1),\n",
-    "(2, 6, 1),\n",
-    "(3, 0, 1),\n",
-    "(5, 6, 1),\n",
-    "(6, 3, 1),\n",
-    "(10, 11, 2),\n",
-    "(10, 12, 2),\n",
-    "(11, 12, 2),\n",
-    "(11, 13, 2),\n",
-    "(12, 13, 2),\n",
-    "(13, 10, 2),\n",
-    "(15, 16, 2),\n",
-    "(15, 14, 2);\n",
-    "\n",
-    "SELECT * FROM edge ORDER BY src, dest;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 2.  Find all weakly connected components"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "14 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>component_id</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>14</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>14</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>14</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(0, 0),\n",
-       " (1, 0),\n",
-       " (2, 0),\n",
-       " (3, 0),\n",
-       " (5, 0),\n",
-       " (6, 0),\n",
-       " (4, 4),\n",
-       " (10, 10),\n",
-       " (11, 10),\n",
-       " (12, 10),\n",
-       " (13, 10),\n",
-       " (14, 14),\n",
-       " (15, 14),\n",
-       " (16, 14)]"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS wcc_out;\n",
-    "\n",
-    "SELECT madlib.weakly_connected_components(\n",
-    "                         'vertex',             -- Vertex table\n",
-    "                         'id',                 -- Vertix id column\n",
-    "                         'edge',               -- Edge table\n",
-    "                         'src=src, dest=dest', -- Comma delimted string 
of edge arguments\n",
-    "                         'wcc_out');      -- Output table of weakly 
connected components\n",
-    "\n",
-    "SELECT * FROM wcc_out ORDER BY component_id, id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 3.  Weakly connected components with grouping"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "13 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id</th>\n",
-       "        <th>component_id</th>\n",
-       "        <th>user_id</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>10</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>11</td>\n",
-       "        <td>10</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>12</td>\n",
-       "        <td>10</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>13</td>\n",
-       "        <td>10</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>14</td>\n",
-       "        <td>14</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>15</td>\n",
-       "        <td>14</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>16</td>\n",
-       "        <td>14</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(0, 0, 1),\n",
-       " (1, 0, 1),\n",
-       " (2, 0, 1),\n",
-       " (3, 0, 1),\n",
-       " (5, 0, 1),\n",
-       " (6, 0, 1),\n",
-       " (10, 10, 2),\n",
-       " (11, 10, 2),\n",
-       " (12, 10, 2),\n",
-       " (13, 10, 2),\n",
-       " (14, 14, 2),\n",
-       " (15, 14, 2),\n",
-       " (16, 14, 2)]"
-      ]
-     },
-     "execution_count": 33,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS wcc_out, wcc_out_summary;\n",
-    "\n",
-    "SELECT madlib.weakly_connected_components(\n",
-    "                         'vertex',             -- Vertex table\n",
-    "                         'id',                 -- Vertix id column\n",
-    "                         'edge',               -- Edge table\n",
-    "                         'src=src, dest=dest', -- Comma delimted string 
of edge arguments\n",
-    "                         'wcc_out',            -- Output table of weakly 
connected components\n",
-    "                         'user_id');           -- Grouping column name\n",
-    "\n",
-    "SELECT * FROM wcc_out ORDER BY component_id, id, user_id;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Note that vertex '4' is not identified as a separate component in the 
above result. This is because disconnected nodes cannot be assigned to a 
particular group with the current graph representation in MADlib."
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/de140c36/community-artifacts/Weakly-connected-cpts-v2.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Weakly-connected-cpts-v2.ipynb 
b/community-artifacts/Weakly-connected-cpts-v2.ipynb
new file mode 100644
index 0000000..3258e44
--- /dev/null
+++ b/community-artifacts/Weakly-connected-cpts-v2.ipynb
@@ -0,0 +1,908 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Weakly connected components\n",
+    "\n",
+    "Weakly connected components was added in MADlib 1.12.  This notebook also 
includes helper functions based on the WCC output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: 
ShimWarning: The `IPython.config` package has been deprecated. You should 
import from traitlets.config instead.\n",
+      "  \"You should import from traitlets.config instead.\", ShimWarning)\n",
+      
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5:
 UserWarning: IPython.utils.traitlets has moved to a top-level traitlets 
package.\n",
+      "  warn(\"IPython.utils.traitlets has moved to a top-level traitlets 
package.\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "u'Connected: gpdbchina@madlib'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Greenplum 4.3.10.0\n",
+    "%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n",
+    "        \n",
+    "# PostgreSQL local\n",
+    "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
+    "\n",
+    "# Greenplum 4.2.3.0\n",
+    "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>version</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>MADlib version: 1.12-dev, git revision: 
rc/v1.9alpha-rc1-195-g85e89ef, cmake configuration time: Wed Jul 26 23:21:17 
UTC 2017, build type: Release, build system: 
Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: 
g++ 4.4.0</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'MADlib version: 1.12-dev, git revision: 
rc/v1.9alpha-rc1-195-g85e89ef, cmake configuration time: Wed Jul 26 23:21:17 
UTC 2017, build type: Release, build system: 
Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: 
g++ 4.4.0',)]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%sql select madlib.version();\n",
+    "#%sql select version();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 1.  Create vertex and edge tables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "Done.\n",
+      "14 rows affected.\n",
+      "18 rows affected.\n",
+      "18 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>src</th>\n",
+       "        <th>dest</th>\n",
+       "        <th>user_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>1</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>2</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>2</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>3</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>5</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>6</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>6</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>3</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>11</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>12</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>11</td>\n",
+       "        <td>12</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>11</td>\n",
+       "        <td>13</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>12</td>\n",
+       "        <td>13</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>13</td>\n",
+       "        <td>10</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>15</td>\n",
+       "        <td>14</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>15</td>\n",
+       "        <td>16</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0, 1, 1),\n",
+       " (0, 2, 1),\n",
+       " (1, 2, 1),\n",
+       " (1, 3, 1),\n",
+       " (2, 3, 1),\n",
+       " (2, 5, 1),\n",
+       " (2, 6, 1),\n",
+       " (3, 0, 1),\n",
+       " (5, 6, 1),\n",
+       " (6, 3, 1),\n",
+       " (10, 11, 2),\n",
+       " (10, 12, 2),\n",
+       " (11, 12, 2),\n",
+       " (11, 13, 2),\n",
+       " (12, 13, 2),\n",
+       " (13, 10, 2),\n",
+       " (15, 14, 2),\n",
+       " (15, 16, 2)]"
+      ]
+     },
+     "execution_count": 103,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql \n",
+    "DROP TABLE IF EXISTS vertex, edge;\n",
+    "\n",
+    "CREATE TABLE vertex(\n",
+    "    id INTEGER\n",
+    ");\n",
+    "\n",
+    "CREATE TABLE edge(\n",
+    "    src INTEGER,\n",
+    "    dest INTEGER,\n",
+    "    user_id INTEGER\n",
+    ");\n",
+    "\n",
+    "INSERT INTO vertex VALUES\n",
+    "(0),\n",
+    "(1),\n",
+    "(2),\n",
+    "(3),\n",
+    "(4),\n",
+    "(5),\n",
+    "(6),\n",
+    "(10),\n",
+    "(11),\n",
+    "(12),\n",
+    "(13),\n",
+    "(14),\n",
+    "(15),\n",
+    "(16);\n",
+    "\n",
+    "INSERT INTO edge VALUES\n",
+    "(0, 1, 1),\n",
+    "(0, 2, 1),\n",
+    "(1, 2, 1),\n",
+    "(1, 3, 1),\n",
+    "(2, 3, 1),\n",
+    "(2, 5, 1),\n",
+    "(2, 6, 1),\n",
+    "(3, 0, 1),\n",
+    "(5, 6, 1),\n",
+    "(6, 3, 1),\n",
+    "(10, 11, 2),\n",
+    "(10, 12, 2),\n",
+    "(11, 12, 2),\n",
+    "(11, 13, 2),\n",
+    "(12, 13, 2),\n",
+    "(13, 10, 2),\n",
+    "(15, 16, 2),\n",
+    "(15, 14, 2);\n",
+    "\n",
+    "SELECT * FROM edge ORDER BY src, dest;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 2.  Find all weakly connected components"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "14 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>id</th>\n",
+       "        <th>component_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>11</td>\n",
+       "        <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>12</td>\n",
+       "        <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>13</td>\n",
+       "        <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>14</td>\n",
+       "        <td>14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>15</td>\n",
+       "        <td>14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>16</td>\n",
+       "        <td>14</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0, 0),\n",
+       " (1, 0),\n",
+       " (2, 0),\n",
+       " (3, 0),\n",
+       " (5, 0),\n",
+       " (6, 0),\n",
+       " (4, 4),\n",
+       " (10, 10),\n",
+       " (11, 10),\n",
+       " (12, 10),\n",
+       " (13, 10),\n",
+       " (14, 14),\n",
+       " (15, 14),\n",
+       " (16, 14)]"
+      ]
+     },
+     "execution_count": 104,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS wcc_out, wcc_out_summary;\n",
+    "\n",
+    "SELECT madlib.weakly_connected_components(\n",
+    "                         'vertex',             -- Vertex table\n",
+    "                         'id',                 -- Vertix id column\n",
+    "                         'edge',               -- Edge table\n",
+    "                         'src=src, dest=dest', -- Comma delimted string 
of edge arguments\n",
+    "                         'wcc_out');           -- Output table of weakly 
connected components\n",
+    "\n",
+    "SELECT * FROM wcc_out ORDER BY component_id, id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 3.  Weakly connected components with grouping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "13 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>id</th>\n",
+       "        <th>component_id</th>\n",
+       "        <th>user_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>10</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>11</td>\n",
+       "        <td>10</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>12</td>\n",
+       "        <td>10</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>13</td>\n",
+       "        <td>10</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>14</td>\n",
+       "        <td>14</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>15</td>\n",
+       "        <td>14</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>16</td>\n",
+       "        <td>14</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0, 0, 1),\n",
+       " (1, 0, 1),\n",
+       " (2, 0, 1),\n",
+       " (3, 0, 1),\n",
+       " (5, 0, 1),\n",
+       " (6, 0, 1),\n",
+       " (10, 10, 2),\n",
+       " (11, 10, 2),\n",
+       " (12, 10, 2),\n",
+       " (13, 10, 2),\n",
+       " (14, 14, 2),\n",
+       " (15, 14, 2),\n",
+       " (16, 14, 2)]"
+      ]
+     },
+     "execution_count": 105,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS wcc_out, wcc_out_summary;\n",
+    "\n",
+    "SELECT madlib.weakly_connected_components(\n",
+    "                         'vertex',             -- Vertex table\n",
+    "                         'id',                 -- Vertix id column\n",
+    "                         'edge',               -- Edge table\n",
+    "                         'src=src, dest=dest', -- Comma delimted string 
of edge arguments\n",
+    "                         'wcc_out',            -- Output table of weakly 
connected components\n",
+    "                         'user_id');           -- Grouping column name\n",
+    "\n",
+    "SELECT * FROM wcc_out ORDER BY component_id, id, user_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that vertex '4' is not identified as a separate component in the 
above result. This is because disconnected nodes cannot be assigned to a 
particular group with the current graph representation in MADlib."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 4. Largest connected component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "2 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>user_id</th>\n",
+       "        <th>component_id</th>\n",
+       "        <th>num_vertices</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "        <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>10</td>\n",
+       "        <td>4</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, 0, 6L), (2, 10, 4L)]"
+      ]
+     },
+     "execution_count": 106,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql \n",
+    "DROP TABLE IF EXISTS largest_cpt_table;\n",
+    "\n",
+    "SELECT madlib.graph_wcc_largest_cpt(\n",
+    "                         'wcc_out',             -- WCC output table\n",
+    "                         'largest_cpt_table');  -- output table 
containing largest component ID\n",
+    "\n",
+    "SELECT * FROM largest_cpt_table ORDER BY component_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 5. Histogram of number of vertices per connected component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "3 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>user_id</th>\n",
+       "        <th>component_id</th>\n",
+       "        <th>num_vertices</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "        <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>10</td>\n",
+       "        <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>14</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, 0, 6L), (2, 10, 4L), (2, 14, 3L)]"
+      ]
+     },
+     "execution_count": 107,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS histogram_table; \n",
+    "\n",
+    "SELECT madlib.graph_wcc_histogram(\n",
+    "                         'wcc_out',           -- WCC's output table\n",
+    "                         'histogram_table');  -- output table containing 
the histogram of vertices\n",
+    "\n",
+    "SELECT * FROM histogram_table ORDER BY component_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 6. Check if two vertices belong to the same component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>user_id</th>\n",
+       "        <th>component_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>14</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(2, 14)]"
+      ]
+     },
+     "execution_count": 108,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS vc_table;\n",
+    "\n",
+    "SELECT madlib.graph_wcc_vertex_check(\n",
+    "                         'wcc_out',    -- WCC output table\n",
+    "                         '14,15',      -- Pair of vertex IDs\n",
+    "                         'vc_table');  -- output table containing 
components that contain the two vertices\n",
+    "\n",
+    "SELECT * FROM vc_table ORDER BY component_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 7. Find all reachable vertices from a source vertex"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "5 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>user_id</th>\n",
+       "        <th>component_id</th>\n",
+       "        <th>dest</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "        <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "        <td>6</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 0, 5), (1, 0, 6)]"
+      ]
+     },
+     "execution_count": 109,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS reach_table;\n",
+    "\n",
+    "SELECT madlib.graph_wcc_reachable_vertices(\n",
+    "                         'wcc_out',         -- WCC output table\n",
+    "                         '0',               -- source vertex\n",
+    "                         'reach_table');    -- output table containing 
all vertices reachable from source vertex\n",
+    "\n",
+    "SELECT * FROM reach_table ORDER BY component_id, dest;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 8. Count of connected components"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "2 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>user_id</th>\n",
+       "        <th>num_components</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, 1L), (2, 2L)]"
+      ]
+     },
+     "execution_count": 110,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS count_table;\n",
+    "\n",
+    "SELECT madlib.graph_wcc_num_cpts(\n",
+    "                         'wcc_out',       -- WCC output table\n",
+    "                         'count_table');  -- output table containing 
number of components per group\n",
+    "\n",
+    "SELECT * FROM count_table;"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}


Reply via email to