Repository: incubator-madlib-site Updated Branches: refs/heads/asf-site 53c36d3d3 -> 12422c78a
add bfs workbook Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/commit/12422c78 Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/tree/12422c78 Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/diff/12422c78 Branch: refs/heads/asf-site Commit: 12422c78abb25ee4e0651798f53dbe9eaba25bc0 Parents: 53c36d3 Author: Frank McQuillan <fmcquil...@pivotal.io> Authored: Fri Jul 14 12:13:33 2017 -0700 Committer: Frank McQuillan <fmcquil...@pivotal.io> Committed: Fri Jul 14 12:13:33 2017 -0700 ---------------------------------------------------------------------- .../Breadth-first-search-v1.ipynb | 1187 ++++++++++++++++++ 1 file changed, 1187 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/12422c78/community-artifacts/Breadth-first-search-v1.ipynb ---------------------------------------------------------------------- diff --git a/community-artifacts/Breadth-first-search-v1.ipynb b/community-artifacts/Breadth-first-search-v1.ipynb new file mode 100644 index 0000000..df6dd97 --- /dev/null +++ b/community-artifacts/Breadth-first-search-v1.ipynb @@ -0,0 +1,1187 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Breadth first search\n", + "\n", + "Breadth first search was added in MADlib 1.12." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The sql extension is already loaded. To reload it, use:\n", + " %reload_ext sql\n" + ] + } + ], + "source": [ + "%load_ext sql" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "u'Connected: gpdbchina@madlib'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Greenplum 4.3.10.0\n", + "%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n", + " \n", + "# PostgreSQL local\n", + "#%sql postgresql://fmcquillan@localhost:5432/madlib\n", + "\n", + "# Greenplum 4.2.3.0\n", + "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>version</th>\n", + " </tr>\n", + " <tr>\n", + " <td>MADlib version: 1.12-dev, git revision: rel/v1.11-29-g8c9b955, cmake configuration time: Thu Jul 13 00:17:54 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(u'MADlib version: 1.12-dev, git revision: rel/v1.11-29-g8c9b955, cmake configuration time: Thu Jul 13 00:17:54 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0',)]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%sql select madlib.version();\n", + "#%sql select version();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Create vertex and edge tables" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "Done.\n", + "Done.\n", + "12 rows affected.\n", + "11 rows affected.\n", + "11 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>src</th>\n", + " <th>dest</th>\n", + " </tr>\n", + " <tr>\n", + " <td>0</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>2</td>\n", + " <td>6</td>\n", + " </tr>\n", + " <tr>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <td>3</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <td>4</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>8</td>\n", + " <td>9</td>\n", + " </tr>\n", + " <tr>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <td>9</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <td>10</td>\n", + " <td>8</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(0, 5),\n", + " (1, 0),\n", + " (1, 3),\n", + " (2, 6),\n", + " (3, 4),\n", + " (3, 5),\n", + " (4, 2),\n", + " (8, 9),\n", + " (9, 10),\n", + " (9, 11),\n", + " (10, 8)]" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql \n", + "DROP TABLE IF EXISTS vertex, edge;\n", + "\n", + "CREATE TABLE vertex(\n", + " id INTEGER\n", + " );\n", + "\n", + "CREATE TABLE edge(\n", + " src INTEGER,\n", + " dest INTEGER\n", + " );\n", + "\n", + "INSERT INTO vertex VALUES\n", + "(0),\n", + "(1),\n", + "(2),\n", + "(3),\n", + "(4),\n", + "(5),\n", + "(6),\n", + "(7),\n", + "(8),\n", + "(9),\n", + "(10),\n", + "(11);\n", + "\n", + "INSERT INTO edge VALUES\n", + "(0, 5),\n", + "(1, 0),\n", + "(1, 3),\n", + "(2, 6),\n", + "(3, 4),\n", + "(3, 5),\n", + "(4, 2),\n", + "(8, 9),\n", + "(9, 10),\n", + "(9, 11),\n", + "(10, 8);\n", + "\n", + "SELECT * FROM edge ORDER BY src, dest;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Traverse undirected graph from vertex 3" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "1 rows affected.\n", + "7 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>id</th>\n", + " <th>dist</th>\n", + " <th>parent</th>\n", + " </tr>\n", + " <tr>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " <td>None</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>5</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <td>6</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(3, 0, None),\n", + " (1, 1, 3),\n", + " (4, 1, 3),\n", + " (5, 1, 3),\n", + " (0, 2, 1),\n", + " (2, 2, 4),\n", + " (6, 3, 2)]" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS out, out_summary;\n", + "\n", + "SELECT madlib.graph_bfs(\n", + " 'vertex', -- Vertex table\n", + " NULL, -- Vertix id column (NULL means use default naming)\n", + " 'edge', -- Edge table\n", + " NULL, -- Edge arguments (NULL means use default naming)\n", + " 3, -- Source vertex for BFS\n", + " 'out'); -- Output table of nodes reachable from source_vertex\n", + " -- Default values used for the other arguments\n", + "\n", + "SELECT * FROM out ORDER BY dist,id;" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>vertex_table</th>\n", + " <th>vertex_id</th>\n", + " <th>edge_table</th>\n", + " <th>edge_args</th>\n", + " <th>source_vertex</th>\n", + " <th>out_table</th>\n", + " <th>max_distance</th>\n", + " <th>directed</th>\n", + " <th>grouping_cols</th>\n", + " </tr>\n", + " <tr>\n", + " <td>vertex</td>\n", + " <td>NULL</td>\n", + " <td>edge</td>\n", + " <td>NULL</td>\n", + " <td>3</td>\n", + " <td>out</td>\n", + " <td>None</td>\n", + " <td>None</td>\n", + " <td>NULL</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(u'vertex', u'NULL', u'edge', u'NULL', 3, u'out', None, None, u'NULL')]" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "SELECT * FROM out_summary;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Use max_distance to limit the search distance" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "1 rows affected.\n", + "6 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>id</th>\n", + " <th>dist</th>\n", + " <th>parent</th>\n", + " </tr>\n", + " <tr>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " <td>None</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>5</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>4</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(3, 0, None), (1, 1, 3), (4, 1, 3), (5, 1, 3), (0, 2, 1), (2, 2, 4)]" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS out_max, out_max_summary;\n", + "\n", + "SELECT madlib.graph_bfs(\n", + " 'vertex', -- Vertex table\n", + " NULL, -- Vertix id column (NULL means use default naming)\n", + " 'edge', -- Edge table\n", + " NULL, -- Edge arguments (NULL means use default naming)\n", + " 3, -- Source vertex for BFS\n", + " 'out_max', -- Output table of nodes reachable from source_vertex\n", + " 2); -- Maximum distance to traverse from source_vertex \n", + " -- Default values used for the other arguments\n", + " \n", + "SELECT * FROM out_max ORDER BY dist,id;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4. Use different column naming\n", + "\n", + "Now let's do an example using different column names in the tables (i.e., not the defaults). Create the vertex and edge tables:" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "12 rows affected.\n", + "11 rows affected.\n", + "11 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>n1</th>\n", + " <th>n2</th>\n", + " </tr>\n", + " <tr>\n", + " <td>0</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>2</td>\n", + " <td>6</td>\n", + " </tr>\n", + " <tr>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <td>3</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <td>4</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>8</td>\n", + " <td>9</td>\n", + " </tr>\n", + " <tr>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <td>9</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <td>10</td>\n", + " <td>8</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(0, 5),\n", + " (1, 0),\n", + " (1, 3),\n", + " (2, 6),\n", + " (3, 4),\n", + " (3, 5),\n", + " (4, 2),\n", + " (8, 9),\n", + " (9, 10),\n", + " (9, 11),\n", + " (10, 8)]" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS vertex_alt, edge_alt;\n", + "CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;\n", + "CREATE TABLE edge_alt AS SELECT src AS n1, dest AS n2 FROM edge;\n", + "SELECT * FROM edge_alt ORDER BY n1, n2;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5. Run BFS from vertex 8" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "1 rows affected.\n", + "4 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>v_id</th>\n", + " <th>dist</th>\n", + " <th>parent</th>\n", + " </tr>\n", + " <tr>\n", + " <td>8</td>\n", + " <td>0</td>\n", + " <td>None</td>\n", + " </tr>\n", + " <tr>\n", + " <td>9</td>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <td>10</td>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <td>11</td>\n", + " <td>2</td>\n", + " <td>9</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(8, 0, None), (9, 1, 8), (10, 1, 8), (11, 2, 9)]" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS out_alt, out_alt_summary;\n", + "\n", + "SELECT madlib.graph_bfs(\n", + " 'vertex_alt', -- Vertex table\n", + " 'v_id', -- Vertex id column (NULL means use default naming)\n", + " 'edge_alt', -- Edge table\n", + " 'src=n1, dest=n2', -- Edge arguments (NULL means use default naming)\n", + " 8, -- Source vertex for BFS\n", + " 'out_alt'); -- Output table of nodes reachable from source_vertex\n", + "\n", + "SELECT * FROM out_alt ORDER BY v_id;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 6. Directed graph\n", + "\n", + "Now we show an example where the graph is treated as a directed graph." + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "1 rows affected.\n", + "4 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>v_id</th>\n", + " <th>dist</th>\n", + " <th>parent</th>\n", + " </tr>\n", + " <tr>\n", + " <td>8</td>\n", + " <td>0</td>\n", + " <td>None</td>\n", + " </tr>\n", + " <tr>\n", + " <td>9</td>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <td>10</td>\n", + " <td>2</td>\n", + " <td>9</td>\n", + " </tr>\n", + " <tr>\n", + " <td>11</td>\n", + " <td>2</td>\n", + " <td>9</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(8, 0, None), (9, 1, 8), (10, 2, 9), (11, 2, 9)]" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS out_alt_dir, out_alt_dir_summary;\n", + "\n", + "SELECT madlib.graph_bfs(\n", + " 'vertex_alt', -- Vertex table\n", + " 'v_id', -- Vertex id column (NULL means use default naming)\n", + " 'edge_alt', -- Edge table\n", + " 'src=n1, dest=n2', -- Edge arguments (NULL means use default naming)\n", + " 8, -- Source vertex for BFS\n", + " 'out_alt_dir', -- Output table of nodes reachable from source_vertex\n", + " NULL, -- Maximum distance to traverse from source_vertex\n", + " TRUE); -- Flag for specifying directed graph\n", + "\n", + "SELECT * FROM out_alt_dir ORDER BY v_id;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that, with the graph being treated as directed, the parent of v_id=10 is now vertex 9 and not 8 as in the undirected case." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 7. Grouping\n", + "\n", + "Create a graph with 2 groups." + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "Done.\n", + "15 rows affected.\n", + "15 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>g1</th>\n", + " <th>g2</th>\n", + " <th>src</th>\n", + " <th>dest</th>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>2</td>\n", + " <td>6</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>3</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>4</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>8</td>\n", + " <td>9</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>9</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>10</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>0</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <td>202</td>\n", + " <td>c</td>\n", + " <td>9</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <td>202</td>\n", + " <td>c</td>\n", + " <td>9</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <td>202</td>\n", + " <td>c</td>\n", + " <td>10</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <td>202</td>\n", + " <td>c</td>\n", + " <td>8</td>\n", + " <td>9</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(100, u'a', 1, 0),\n", + " (100, u'a', 1, 3),\n", + " (100, u'a', 2, 6),\n", + " (100, u'a', 3, 4),\n", + " (100, u'a', 3, 5),\n", + " (100, u'a', 4, 2),\n", + " (100, u'a', 8, 9),\n", + " (100, u'a', 9, 10),\n", + " (100, u'a', 9, 11),\n", + " (100, u'a', 10, 8),\n", + " (100, u'a', 0, 5),\n", + " (202, u'c', 9, 10),\n", + " (202, u'c', 9, 11),\n", + " (202, u'c', 10, 8),\n", + " (202, u'c', 8, 9)]" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS edge_gr;\n", + "\n", + "CREATE TABLE edge_gr(\n", + " g1 INTEGER,\n", + " g2 TEXT,\n", + " src INTEGER,\n", + " dest INTEGER\n", + " );\n", + "\n", + "INSERT INTO edge_gr VALUES\n", + "(100, 'a', 0, 5),\n", + "(100, 'a', 1, 0),\n", + "(100, 'a', 1, 3),\n", + "(100, 'a', 2, 6),\n", + "(100, 'a', 3, 4),\n", + "(100, 'a', 3, 5),\n", + "(100, 'a', 4, 2),\n", + "(100, 'a', 8, 9),\n", + "(100, 'a', 9, 10),\n", + "(100, 'a', 9, 11),\n", + "(100, 'a', 10, 8),\n", + "(202, 'c', 8, 9),\n", + "(202, 'c', 9, 10),\n", + "(202, 'c', 9, 11),\n", + "(202, 'c', 10, 8);\n", + "\n", + "SELECT * FROM edge_gr ORDER BY g1, g2;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 8. Run BFS for all groups" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "1 rows affected.\n", + "8 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>g1</th>\n", + " <th>g2</th>\n", + " <th>id</th>\n", + " <th>dist</th>\n", + " <th>parent</th>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>8</td>\n", + " <td>0</td>\n", + " <td>None</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>9</td>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>10</td>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>11</td>\n", + " <td>2</td>\n", + " <td>9</td>\n", + " </tr>\n", + " <tr>\n", + " <td>202</td>\n", + " <td>c</td>\n", + " <td>8</td>\n", + " <td>0</td>\n", + " <td>None</td>\n", + " </tr>\n", + " <tr>\n", + " <td>202</td>\n", + " <td>c</td>\n", + " <td>9</td>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <td>202</td>\n", + " <td>c</td>\n", + " <td>10</td>\n", + " <td>1</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <td>202</td>\n", + " <td>c</td>\n", + " <td>11</td>\n", + " <td>2</td>\n", + " <td>9</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(100, u'a', 8, 0, None),\n", + " (100, u'a', 9, 1, 8),\n", + " (100, u'a', 10, 1, 8),\n", + " (100, u'a', 11, 2, 9),\n", + " (202, u'c', 8, 0, None),\n", + " (202, u'c', 9, 1, 8),\n", + " (202, u'c', 10, 1, 8),\n", + " (202, u'c', 11, 2, 9)]" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS out_gr, out_gr_summary;\n", + "\n", + "SELECT madlib.graph_bfs(\n", + " 'vertex', -- Vertex table\n", + " NULL, -- Vertex id column (NULL means use default naming)\n", + " 'edge_gr', -- Edge table\n", + " NULL, -- Edge arguments (NULL means use default naming)\n", + " 8, -- Source vertex for BFS\n", + " 'out_gr', -- Output table of nodes reachable from source_vertex\n", + " NULL, -- Maximum distance to traverse from source_vertex\n", + " NULL, -- Flag for specifying directed graph\n", + " 'g1,g2' -- Grouping columns\n", + ");\n", + "\n", + "SELECT * FROM out_gr ORDER BY g1,g2,dist,id;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If source_vertex is not present in a group, then that group will not appear in the output table:" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "1 rows affected.\n", + "7 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>g1</th>\n", + " <th>g2</th>\n", + " <th>id</th>\n", + " <th>dist</th>\n", + " <th>parent</th>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " <td>None</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>4</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>5</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <td>100</td>\n", + " <td>a</td>\n", + " <td>6</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(100, u'a', 3, 0, None),\n", + " (100, u'a', 1, 1, 3),\n", + " (100, u'a', 4, 1, 3),\n", + " (100, u'a', 5, 1, 3),\n", + " (100, u'a', 0, 2, 1),\n", + " (100, u'a', 2, 2, 4),\n", + " (100, u'a', 6, 3, 2)]" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS out_gr, out_gr_summary;\n", + "\n", + "SELECT madlib.graph_bfs(\n", + " 'vertex', -- Vertex table\n", + " NULL, -- Vertex id column (NULL means use default naming)\n", + " 'edge_gr', -- Edge table\n", + " NULL, -- Edge arguments (NULL means use default naming)\n", + " 3, -- Source vertex for BFS\n", + " 'out_gr', -- Output table of nodes reachable from source_vertex\n", + " NULL, -- Maximum distance to traverse from source_vertex\n", + " NULL, -- Flag for specifying directed graph\n", + " 'g1,g2' -- Grouping columns\n", + ");\n", + "\n", + "SELECT * FROM out_gr ORDER BY g1,g2,dist,id;" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}