Repository: incubator-madlib-site Updated Branches: refs/heads/asf-site e1b973ae4 -> 53c36d3d3
add weakly connected components workbook Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/commit/53c36d3d Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/tree/53c36d3d Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/diff/53c36d3d Branch: refs/heads/asf-site Commit: 53c36d3d3b6a74300d5cbf5805caa4ce561f345e Parents: e1b973a Author: Frank McQuillan <fmcquil...@pivotal.io> Authored: Thu Jul 6 16:31:31 2017 -0700 Committer: Frank McQuillan <fmcquil...@pivotal.io> Committed: Thu Jul 6 16:31:31 2017 -0700 ---------------------------------------------------------------------- .../Weakly-connected-cpts-v1.ipynb | 584 +++++++++++++++++++ 1 file changed, 584 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/53c36d3d/community-artifacts/Weakly-connected-cpts-v1.ipynb ---------------------------------------------------------------------- diff --git a/community-artifacts/Weakly-connected-cpts-v1.ipynb b/community-artifacts/Weakly-connected-cpts-v1.ipynb new file mode 100644 index 0000000..813692b --- /dev/null +++ b/community-artifacts/Weakly-connected-cpts-v1.ipynb @@ -0,0 +1,584 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Weakly connected components\n", + "\n", + "Weakly connected components was added in MADlib 1.12." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The sql extension is already loaded. To reload it, use:\n", + " %reload_ext sql\n" + ] + } + ], + "source": [ + "%load_ext sql" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "u'Connected: gpdbchina@madlib'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Greenplum 4.3.10.0\n", + "%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n", + " \n", + "# PostgreSQL local\n", + "#%sql postgresql://fmcquillan@localhost:5432/madlib\n", + "\n", + "# Greenplum 4.2.3.0\n", + "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>version</th>\n", + " </tr>\n", + " <tr>\n", + " <td>MADlib version: 1.12-dev, git revision: rc/v1.9alpha-rc1-188-ge47dc8a, cmake configuration time: Wed Jul 5 18:23:50 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(u'MADlib version: 1.12-dev, git revision: rc/v1.9alpha-rc1-188-ge47dc8a, cmake configuration time: Wed Jul 5 18:23:50 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0',)]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%sql select madlib.version();\n", + "#%sql select version();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Create vertex and edge tables" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "Done.\n", + "Done.\n", + "14 rows affected.\n", + "18 rows affected.\n", + "18 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>src</th>\n", + " <th>dest</th>\n", + " <th>user_id</th>\n", + " </tr>\n", + " <tr>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>2</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>5</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>6</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>10</td>\n", + " <td>12</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>11</td>\n", + " <td>12</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>11</td>\n", + " <td>13</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>12</td>\n", + " <td>13</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>13</td>\n", + " <td>10</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>15</td>\n", + " <td>14</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>15</td>\n", + " <td>16</td>\n", + " <td>2</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(0, 1, 1),\n", + " (0, 2, 1),\n", + " (1, 2, 1),\n", + " (1, 3, 1),\n", + " (2, 3, 1),\n", + " (2, 5, 1),\n", + " (2, 6, 1),\n", + " (3, 0, 1),\n", + " (5, 6, 1),\n", + " (6, 3, 1),\n", + " (10, 11, 2),\n", + " (10, 12, 2),\n", + " (11, 12, 2),\n", + " (11, 13, 2),\n", + " (12, 13, 2),\n", + " (13, 10, 2),\n", + " (15, 14, 2),\n", + " (15, 16, 2)]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql \n", + "DROP TABLE IF EXISTS vertex, edge;\n", + "\n", + "CREATE TABLE vertex(\n", + " id INTEGER\n", + ");\n", + "\n", + "CREATE TABLE edge(\n", + " src INTEGER,\n", + " dest INTEGER,\n", + " user_id INTEGER\n", + ");\n", + "\n", + "INSERT INTO vertex VALUES\n", + "(0),\n", + "(1),\n", + "(2),\n", + "(3),\n", + "(4),\n", + "(5),\n", + "(6),\n", + "(10),\n", + "(11),\n", + "(12),\n", + "(13),\n", + "(14),\n", + "(15),\n", + "(16);\n", + "\n", + "INSERT INTO edge VALUES\n", + "(0, 1, 1),\n", + "(0, 2, 1),\n", + "(1, 2, 1),\n", + "(1, 3, 1),\n", + "(2, 3, 1),\n", + "(2, 5, 1),\n", + "(2, 6, 1),\n", + "(3, 0, 1),\n", + "(5, 6, 1),\n", + "(6, 3, 1),\n", + "(10, 11, 2),\n", + "(10, 12, 2),\n", + "(11, 12, 2),\n", + "(11, 13, 2),\n", + "(12, 13, 2),\n", + "(13, 10, 2),\n", + "(15, 16, 2),\n", + "(15, 14, 2);\n", + "\n", + "SELECT * FROM edge ORDER BY src, dest;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Find all weakly connected components" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "1 rows affected.\n", + "14 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>id</th>\n", + " <th>component_id</th>\n", + " </tr>\n", + " <tr>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <td>6</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <td>10</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <td>11</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <td>12</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <td>13</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <td>14</td>\n", + " <td>14</td>\n", + " </tr>\n", + " <tr>\n", + " <td>15</td>\n", + " <td>14</td>\n", + " </tr>\n", + " <tr>\n", + " <td>16</td>\n", + " <td>14</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(0, 0),\n", + " (1, 0),\n", + " (2, 0),\n", + " (3, 0),\n", + " (5, 0),\n", + " (6, 0),\n", + " (4, 4),\n", + " (10, 10),\n", + " (11, 10),\n", + " (12, 10),\n", + " (13, 10),\n", + " (14, 14),\n", + " (15, 14),\n", + " (16, 14)]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS wcc_out;\n", + "\n", + "SELECT madlib.weakly_connected_components(\n", + " 'vertex', -- Vertex table\n", + " 'id', -- Vertix id column\n", + " 'edge', -- Edge table\n", + " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n", + " 'wcc_out'); -- Output table of weakly connected components\n", + "\n", + "SELECT * FROM wcc_out ORDER BY component_id, id;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Weakly connected components with grouping" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "1 rows affected.\n", + "13 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "<table>\n", + " <tr>\n", + " <th>id</th>\n", + " <th>component_id</th>\n", + " <th>user_id</th>\n", + " </tr>\n", + " <tr>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>6</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <td>10</td>\n", + " <td>10</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>11</td>\n", + " <td>10</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>12</td>\n", + " <td>10</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>13</td>\n", + " <td>10</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>14</td>\n", + " <td>14</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>15</td>\n", + " <td>14</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <td>16</td>\n", + " <td>14</td>\n", + " <td>2</td>\n", + " </tr>\n", + "</table>" + ], + "text/plain": [ + "[(0, 0, 1),\n", + " (1, 0, 1),\n", + " (2, 0, 1),\n", + " (3, 0, 1),\n", + " (5, 0, 1),\n", + " (6, 0, 1),\n", + " (10, 10, 2),\n", + " (11, 10, 2),\n", + " (12, 10, 2),\n", + " (13, 10, 2),\n", + " (14, 14, 2),\n", + " (15, 14, 2),\n", + " (16, 14, 2)]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS wcc_out, wcc_out_summary;\n", + "\n", + "SELECT madlib.weakly_connected_components(\n", + " 'vertex', -- Vertex table\n", + " 'id', -- Vertix id column\n", + " 'edge', -- Edge table\n", + " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n", + " 'wcc_out', -- Output table of weakly connected components\n", + " 'user_id'); -- Grouping column name\n", + "\n", + "SELECT * FROM wcc_out ORDER BY component_id, id, user_id;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that vertex '4' is not identified as a separate component in the above result. This is because disconnected nodes cannot be assigned to a particular group with the current graph representation in MADlib." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}