Repository: incubator-madlib-site
Updated Branches:
  refs/heads/asf-site 53c36d3d3 -> 12422c78a


add bfs workbook


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/commit/12422c78
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/tree/12422c78
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/diff/12422c78

Branch: refs/heads/asf-site
Commit: 12422c78abb25ee4e0651798f53dbe9eaba25bc0
Parents: 53c36d3
Author: Frank McQuillan <fmcquil...@pivotal.io>
Authored: Fri Jul 14 12:13:33 2017 -0700
Committer: Frank McQuillan <fmcquil...@pivotal.io>
Committed: Fri Jul 14 12:13:33 2017 -0700

----------------------------------------------------------------------
 .../Breadth-first-search-v1.ipynb               | 1187 ++++++++++++++++++
 1 file changed, 1187 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/12422c78/community-artifacts/Breadth-first-search-v1.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Breadth-first-search-v1.ipynb 
b/community-artifacts/Breadth-first-search-v1.ipynb
new file mode 100644
index 0000000..df6dd97
--- /dev/null
+++ b/community-artifacts/Breadth-first-search-v1.ipynb
@@ -0,0 +1,1187 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Breadth first search\n",
+    "\n",
+    "Breadth first search was added in MADlib 1.12."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The sql extension is already loaded. To reload it, use:\n",
+      "  %reload_ext sql\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "u'Connected: gpdbchina@madlib'"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Greenplum 4.3.10.0\n",
+    "%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n",
+    "        \n",
+    "# PostgreSQL local\n",
+    "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
+    "\n",
+    "# Greenplum 4.2.3.0\n",
+    "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>version</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>MADlib version: 1.12-dev, git revision: 
rel/v1.11-29-g8c9b955, cmake configuration time: Thu Jul 13 00:17:54 UTC 2017, 
build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C 
compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'MADlib version: 1.12-dev, git revision: rel/v1.11-29-g8c9b955, 
cmake configuration time: Thu Jul 13 00:17:54 UTC 2017, build type: Release, 
build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, 
C++ compiler: g++ 4.4.0',)]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%sql select madlib.version();\n",
+    "#%sql select version();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 1.  Create vertex and edge tables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "Done.\n",
+      "12 rows affected.\n",
+      "11 rows affected.\n",
+      "11 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>src</th>\n",
+       "        <th>dest</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0, 5),\n",
+       " (1, 0),\n",
+       " (1, 3),\n",
+       " (2, 6),\n",
+       " (3, 4),\n",
+       " (3, 5),\n",
+       " (4, 2),\n",
+       " (8, 9),\n",
+       " (9, 10),\n",
+       " (9, 11),\n",
+       " (10, 8)]"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql \n",
+    "DROP TABLE IF EXISTS vertex, edge;\n",
+    "\n",
+    "CREATE TABLE vertex(\n",
+    "        id INTEGER\n",
+    "        );\n",
+    "\n",
+    "CREATE TABLE edge(\n",
+    "        src INTEGER,\n",
+    "        dest INTEGER\n",
+    "        );\n",
+    "\n",
+    "INSERT INTO vertex VALUES\n",
+    "(0),\n",
+    "(1),\n",
+    "(2),\n",
+    "(3),\n",
+    "(4),\n",
+    "(5),\n",
+    "(6),\n",
+    "(7),\n",
+    "(8),\n",
+    "(9),\n",
+    "(10),\n",
+    "(11);\n",
+    "\n",
+    "INSERT INTO edge VALUES\n",
+    "(0, 5),\n",
+    "(1, 0),\n",
+    "(1, 3),\n",
+    "(2, 6),\n",
+    "(3, 4),\n",
+    "(3, 5),\n",
+    "(4, 2),\n",
+    "(8, 9),\n",
+    "(9, 10),\n",
+    "(9, 11),\n",
+    "(10, 8);\n",
+    "\n",
+    "SELECT * FROM edge ORDER BY src, dest;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 2.  Traverse undirected graph from vertex 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "7 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>id</th>\n",
+       "        <th>dist</th>\n",
+       "        <th>parent</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>2</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>2</td>\n",
+       "        <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>3</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(3, 0, None),\n",
+       " (1, 1, 3),\n",
+       " (4, 1, 3),\n",
+       " (5, 1, 3),\n",
+       " (0, 2, 1),\n",
+       " (2, 2, 4),\n",
+       " (6, 3, 2)]"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS out, out_summary;\n",
+    "\n",
+    "SELECT madlib.graph_bfs(\n",
+    "                         'vertex',      -- Vertex table\n",
+    "                         NULL,          -- Vertix id column (NULL means 
use default naming)\n",
+    "                         'edge',        -- Edge table\n",
+    "                         NULL,          -- Edge arguments (NULL means use 
default naming)\n",
+    "                         3,             -- Source vertex for BFS\n",
+    "                         'out');        -- Output table of nodes 
reachable from source_vertex\n",
+    "                                        -- Default values used for the 
other arguments\n",
+    "\n",
+    "SELECT * FROM out ORDER BY dist,id;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>vertex_table</th>\n",
+       "        <th>vertex_id</th>\n",
+       "        <th>edge_table</th>\n",
+       "        <th>edge_args</th>\n",
+       "        <th>source_vertex</th>\n",
+       "        <th>out_table</th>\n",
+       "        <th>max_distance</th>\n",
+       "        <th>directed</th>\n",
+       "        <th>grouping_cols</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>vertex</td>\n",
+       "        <td>NULL</td>\n",
+       "        <td>edge</td>\n",
+       "        <td>NULL</td>\n",
+       "        <td>3</td>\n",
+       "        <td>out</td>\n",
+       "        <td>None</td>\n",
+       "        <td>None</td>\n",
+       "        <td>NULL</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'vertex', u'NULL', u'edge', u'NULL', 3, u'out', None, None, 
u'NULL')]"
+      ]
+     },
+     "execution_count": 88,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM out_summary;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 3.  Use max_distance to limit the search distance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "6 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>id</th>\n",
+       "        <th>dist</th>\n",
+       "        <th>parent</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>2</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>2</td>\n",
+       "        <td>4</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(3, 0, None), (1, 1, 3), (4, 1, 3), (5, 1, 3), (0, 2, 1), (2, 2, 4)]"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS out_max, out_max_summary;\n",
+    "\n",
+    "SELECT madlib.graph_bfs(\n",
+    "                         'vertex',      -- Vertex table\n",
+    "                         NULL,          -- Vertix id column (NULL means 
use default naming)\n",
+    "                         'edge',        -- Edge table\n",
+    "                         NULL,          -- Edge arguments (NULL means use 
default naming)\n",
+    "                         3,             -- Source vertex for BFS\n",
+    "                         'out_max',     -- Output table of nodes 
reachable from source_vertex\n",
+    "                         2);            -- Maximum distance to traverse 
from source_vertex        \n",
+    "                                        -- Default values used for the 
other arguments\n",
+    "    \n",
+    "SELECT * FROM out_max ORDER BY dist,id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 4.  Use different column naming\n",
+    "\n",
+    "Now let's do an example using different column names in the tables (i.e., 
not the defaults). Create the vertex and edge tables:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "12 rows affected.\n",
+      "11 rows affected.\n",
+      "11 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>n1</th>\n",
+       "        <th>n2</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0, 5),\n",
+       " (1, 0),\n",
+       " (1, 3),\n",
+       " (2, 6),\n",
+       " (3, 4),\n",
+       " (3, 5),\n",
+       " (4, 2),\n",
+       " (8, 9),\n",
+       " (9, 10),\n",
+       " (9, 11),\n",
+       " (10, 8)]"
+      ]
+     },
+     "execution_count": 89,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS vertex_alt, edge_alt;\n",
+    "CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;\n",
+    "CREATE TABLE edge_alt AS SELECT src AS n1, dest AS n2 FROM edge;\n",
+    "SELECT * FROM edge_alt ORDER BY n1, n2;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 5. Run BFS from vertex 8"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "4 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>v_id</th>\n",
+       "        <th>dist</th>\n",
+       "        <th>parent</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>1</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>1</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>11</td>\n",
+       "        <td>2</td>\n",
+       "        <td>9</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(8, 0, None), (9, 1, 8), (10, 1, 8), (11, 2, 9)]"
+      ]
+     },
+     "execution_count": 90,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS out_alt, out_alt_summary;\n",
+    "\n",
+    "SELECT madlib.graph_bfs(\n",
+    "                         'vertex_alt',                  -- Vertex 
table\n",
+    "                         'v_id',                        -- Vertex id 
column (NULL means use default naming)\n",
+    "                         'edge_alt',                    -- Edge table\n",
+    "                         'src=n1, dest=n2',             -- Edge arguments 
(NULL means use default naming)\n",
+    "                         8,                             -- Source vertex 
for BFS\n",
+    "                         'out_alt');                    -- Output table 
of nodes reachable from source_vertex\n",
+    "\n",
+    "SELECT * FROM out_alt ORDER BY v_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 6. Directed graph\n",
+    "\n",
+    "Now we show an example where the graph is treated as a directed graph."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "4 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>v_id</th>\n",
+       "        <th>dist</th>\n",
+       "        <th>parent</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>1</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>2</td>\n",
+       "        <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>11</td>\n",
+       "        <td>2</td>\n",
+       "        <td>9</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(8, 0, None), (9, 1, 8), (10, 2, 9), (11, 2, 9)]"
+      ]
+     },
+     "execution_count": 91,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS out_alt_dir, out_alt_dir_summary;\n",
+    "\n",
+    "SELECT madlib.graph_bfs(\n",
+    "                         'vertex_alt',                  -- Vertex 
table\n",
+    "                         'v_id',                        -- Vertex id 
column (NULL means use default naming)\n",
+    "                         'edge_alt',                    -- Edge table\n",
+    "                         'src=n1, dest=n2',             -- Edge arguments 
(NULL means use default naming)\n",
+    "                         8,                             -- Source vertex 
for BFS\n",
+    "                         'out_alt_dir',                 -- Output table 
of nodes reachable from source_vertex\n",
+    "                         NULL,                          -- Maximum 
distance to traverse from source_vertex\n",
+    "                         TRUE);                         -- Flag for 
specifying directed graph\n",
+    "\n",
+    "SELECT * FROM out_alt_dir ORDER BY v_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notice that, with the graph being treated as directed, the parent of 
v_id=10 is now vertex 9 and not 8 as in the undirected case."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 7. Grouping\n",
+    "\n",
+    "Create a graph with 2 groups."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "15 rows affected.\n",
+      "15 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>g1</th>\n",
+       "        <th>g2</th>\n",
+       "        <th>src</th>\n",
+       "        <th>dest</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>1</td>\n",
+       "        <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>2</td>\n",
+       "        <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>3</td>\n",
+       "        <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>3</td>\n",
+       "        <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>4</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>8</td>\n",
+       "        <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>9</td>\n",
+       "        <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>9</td>\n",
+       "        <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>10</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>0</td>\n",
+       "        <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>202</td>\n",
+       "        <td>c</td>\n",
+       "        <td>9</td>\n",
+       "        <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>202</td>\n",
+       "        <td>c</td>\n",
+       "        <td>9</td>\n",
+       "        <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>202</td>\n",
+       "        <td>c</td>\n",
+       "        <td>10</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>202</td>\n",
+       "        <td>c</td>\n",
+       "        <td>8</td>\n",
+       "        <td>9</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(100, u'a', 1, 0),\n",
+       " (100, u'a', 1, 3),\n",
+       " (100, u'a', 2, 6),\n",
+       " (100, u'a', 3, 4),\n",
+       " (100, u'a', 3, 5),\n",
+       " (100, u'a', 4, 2),\n",
+       " (100, u'a', 8, 9),\n",
+       " (100, u'a', 9, 10),\n",
+       " (100, u'a', 9, 11),\n",
+       " (100, u'a', 10, 8),\n",
+       " (100, u'a', 0, 5),\n",
+       " (202, u'c', 9, 10),\n",
+       " (202, u'c', 9, 11),\n",
+       " (202, u'c', 10, 8),\n",
+       " (202, u'c', 8, 9)]"
+      ]
+     },
+     "execution_count": 92,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS edge_gr;\n",
+    "\n",
+    "CREATE TABLE edge_gr(\n",
+    "                  g1 INTEGER,\n",
+    "                  g2 TEXT,\n",
+    "                  src INTEGER,\n",
+    "                  dest INTEGER\n",
+    "                );\n",
+    "\n",
+    "INSERT INTO edge_gr VALUES\n",
+    "(100, 'a', 0, 5),\n",
+    "(100, 'a', 1, 0),\n",
+    "(100, 'a', 1, 3),\n",
+    "(100, 'a', 2, 6),\n",
+    "(100, 'a', 3, 4),\n",
+    "(100, 'a', 3, 5),\n",
+    "(100, 'a', 4, 2),\n",
+    "(100, 'a', 8, 9),\n",
+    "(100, 'a', 9, 10),\n",
+    "(100, 'a', 9, 11),\n",
+    "(100, 'a', 10, 8),\n",
+    "(202, 'c', 8, 9),\n",
+    "(202, 'c', 9, 10),\n",
+    "(202, 'c', 9, 11),\n",
+    "(202, 'c', 10, 8);\n",
+    "\n",
+    "SELECT * FROM edge_gr ORDER BY g1, g2;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 8. Run BFS for all groups"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "8 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>g1</th>\n",
+       "        <th>g2</th>\n",
+       "        <th>id</th>\n",
+       "        <th>dist</th>\n",
+       "        <th>parent</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>8</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>9</td>\n",
+       "        <td>1</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>10</td>\n",
+       "        <td>1</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>11</td>\n",
+       "        <td>2</td>\n",
+       "        <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>202</td>\n",
+       "        <td>c</td>\n",
+       "        <td>8</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>202</td>\n",
+       "        <td>c</td>\n",
+       "        <td>9</td>\n",
+       "        <td>1</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>202</td>\n",
+       "        <td>c</td>\n",
+       "        <td>10</td>\n",
+       "        <td>1</td>\n",
+       "        <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>202</td>\n",
+       "        <td>c</td>\n",
+       "        <td>11</td>\n",
+       "        <td>2</td>\n",
+       "        <td>9</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(100, u'a', 8, 0, None),\n",
+       " (100, u'a', 9, 1, 8),\n",
+       " (100, u'a', 10, 1, 8),\n",
+       " (100, u'a', 11, 2, 9),\n",
+       " (202, u'c', 8, 0, None),\n",
+       " (202, u'c', 9, 1, 8),\n",
+       " (202, u'c', 10, 1, 8),\n",
+       " (202, u'c', 11, 2, 9)]"
+      ]
+     },
+     "execution_count": 93,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS out_gr, out_gr_summary;\n",
+    "\n",
+    "SELECT madlib.graph_bfs(\n",
+    "                         'vertex',      -- Vertex table\n",
+    "                         NULL,          -- Vertex id column (NULL means 
use default naming)\n",
+    "                         'edge_gr',     -- Edge table\n",
+    "                         NULL,          -- Edge arguments (NULL means use 
default naming)\n",
+    "                         8,             -- Source vertex for BFS\n",
+    "                         'out_gr',      -- Output table of nodes 
reachable from source_vertex\n",
+    "                         NULL,          -- Maximum distance to traverse 
from source_vertex\n",
+    "                         NULL,          -- Flag for specifying directed 
graph\n",
+    "                         'g1,g2'        -- Grouping columns\n",
+    ");\n",
+    "\n",
+    "SELECT * FROM out_gr ORDER BY g1,g2,dist,id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If source_vertex is not present in a group, then that group will not 
appear in the output table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "7 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>g1</th>\n",
+       "        <th>g2</th>\n",
+       "        <th>id</th>\n",
+       "        <th>dist</th>\n",
+       "        <th>parent</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>3</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>1</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>4</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>5</td>\n",
+       "        <td>1</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>0</td>\n",
+       "        <td>2</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>2</td>\n",
+       "        <td>2</td>\n",
+       "        <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>a</td>\n",
+       "        <td>6</td>\n",
+       "        <td>3</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(100, u'a', 3, 0, None),\n",
+       " (100, u'a', 1, 1, 3),\n",
+       " (100, u'a', 4, 1, 3),\n",
+       " (100, u'a', 5, 1, 3),\n",
+       " (100, u'a', 0, 2, 1),\n",
+       " (100, u'a', 2, 2, 4),\n",
+       " (100, u'a', 6, 3, 2)]"
+      ]
+     },
+     "execution_count": 94,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS out_gr, out_gr_summary;\n",
+    "\n",
+    "SELECT madlib.graph_bfs(\n",
+    "                         'vertex',      -- Vertex table\n",
+    "                         NULL,          -- Vertex id column (NULL means 
use default naming)\n",
+    "                         'edge_gr',     -- Edge table\n",
+    "                         NULL,          -- Edge arguments (NULL means use 
default naming)\n",
+    "                         3,             -- Source vertex for BFS\n",
+    "                         'out_gr',      -- Output table of nodes 
reachable from source_vertex\n",
+    "                         NULL,          -- Maximum distance to traverse 
from source_vertex\n",
+    "                         NULL,          -- Flag for specifying directed 
graph\n",
+    "                         'g1,g2'        -- Grouping columns\n",
+    ");\n",
+    "\n",
+    "SELECT * FROM out_gr ORDER BY g1,g2,dist,id;"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

Reply via email to