Repository: incubator-madlib-site
Updated Branches:
  refs/heads/asf-site d1a3327ed -> 86869127c


http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/86869127/community-artifacts/Sessionize-demo-2.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Sessionize-demo-2.ipynb 
b/community-artifacts/Sessionize-demo-2.ipynb
new file mode 100644
index 0000000..5d30d9f
--- /dev/null
+++ b/community-artifacts/Sessionize-demo-2.ipynb
@@ -0,0 +1,615 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Sessionize\n",
+    "\n",
+    "The MADlib sessionize function performs time-oriented session 
reconstruction on a data set comprising a sequence of events. A defined period 
of inactivity indicates the end of one session and beginning of the next 
session."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The sql extension is already loaded. To reload it, use:\n",
+      "  %reload_ext sql\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "u'Connected: fmcquillan@madlib'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %sql postgresql://gpdbchina@10.194.10.68:55000/madlib\n",
+    "%sql postgresql://fmcquillan@localhost:5432/madlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>version</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>MADlib version: 1.9.1, git revision: 
rc/v1.9-rc1-39-g1929aed, cmake configuration time: Tue Aug 30 00:17:02 UTC 
2016, build type: RelWithDebInfo, build system: Darwin-14.5.0, C compiler: 
Clang, C++ compiler: Clang</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'MADlib version: 1.9.1, git revision: rc/v1.9-rc1-39-g1929aed, 
cmake configuration time: Tue Aug 30 00:17:02 UTC 2016, build type: 
RelWithDebInfo, build system: Darwin-14.5.0, C compiler: Clang, C++ compiler: 
Clang',)]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%sql select madlib.version();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "The data set describes shopper behavior on a notional web site that sells 
beer and wine. A beacon fires an event to a log file when the shopper visits 
different pages on the site: landing page, beer selection page, wine selection 
page, and checkout. Each user is identified by a a user id, and every time a 
page is visited, the page and time stamp are logged.\n",
+    "\n",
+    "Create the data table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "21 rows affected.\n",
+      "21 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>event_timestamp</th>\n",
+       "        <th>user_id</th>\n",
+       "        <th>page</th>\n",
+       "        <th>revenue</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:03:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:04:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:05:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:05:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>39.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:06:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>HELP</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:09:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:06:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:09:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:15:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:15:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:16:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:16:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:17:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:17:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>HELP</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:18:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:19:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>16.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:29:00</td>\n",
+       "        <td>201881</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:30:00</td>\n",
+       "        <td>201881</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 03:18:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 03:19:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 03:22:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>21.0</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(datetime.datetime(2015, 4, 15, 1, 3), 100821, u'LANDING', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 4), 100821, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 5), 202201, u'LANDING', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 5), 100821, u'CHECKOUT', 39.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 6), 202201, u'HELP', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 9), 202201, u'LANDING', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 6), 100821, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 9), 100821, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 15), 101331, u'LANDING', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 15), 202201, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 16), 101331, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 16), 202201, u'BEER', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 17), 202201, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 17), 101331, u'HELP', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 18), 101331, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 19), 101331, u'CHECKOUT', 
16.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 29), 201881, u'LANDING', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 30), 201881, u'BEER', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 3, 18), 202201, u'BEER', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 3, 19), 202201, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 3, 22), 202201, u'CHECKOUT', 21.0)]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql \n",
+    "DROP TABLE IF EXISTS eventlog CASCADE; -- Use CASCADE because views 
created below depend on this table\n",
+    "\n",
+    "CREATE TABLE eventlog (event_timestamp TIMESTAMP,\n",
+    "            user_id INT,\n",
+    "            page TEXT,\n",
+    "            revenue FLOAT);\n",
+    "\n",
+    "INSERT INTO eventlog VALUES\n",
+    "('04/15/2015 02:19:00', 101331, 'CHECKOUT', 16),\n",
+    "('04/15/2015 02:17:00', 202201, 'WINE', 0),\n",
+    "('04/15/2015 03:18:00', 202201, 'BEER', 0),\n",
+    "('04/15/2015 01:03:00', 100821, 'LANDING', 0),\n",
+    "('04/15/2015 01:04:00', 100821, 'WINE', 0),\n",
+    "('04/15/2015 01:05:00', 100821, 'CHECKOUT', 39),\n",
+    "('04/15/2015 02:06:00', 100821, 'WINE', 0),\n",
+    "('04/15/2015 02:09:00', 100821, 'WINE', 0),\n",
+    "('04/15/2015 02:15:00', 101331, 'LANDING', 0),\n",
+    "('04/15/2015 02:16:00', 101331, 'WINE', 0),\n",
+    "('04/15/2015 02:17:00', 101331, 'HELP', 0),\n",
+    "('04/15/2015 02:18:00', 101331, 'WINE', 0),\n",
+    "('04/15/2015 02:29:00', 201881, 'LANDING', 0),\n",
+    "('04/15/2015 02:30:00', 201881, 'BEER', 0),\n",
+    "('04/15/2015 01:05:00', 202201, 'LANDING', 0),\n",
+    "('04/15/2015 01:06:00', 202201, 'HELP', 0),\n",
+    "('04/15/2015 01:09:00', 202201, 'LANDING', 0),\n",
+    "('04/15/2015 02:15:00', 202201, 'WINE', 0),\n",
+    "('04/15/2015 02:16:00', 202201, 'BEER', 0),\n",
+    "('04/15/2015 03:19:00', 202201, 'WINE', 0),\n",
+    "('04/15/2015 03:22:00', 202201, 'CHECKOUT', 21);\n",
+    "\n",
+    "SELECT * FROM eventlog ORDER BY event_timestamp;\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Sessionize the table by each user_id:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "21 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>event_timestamp</th>\n",
+       "        <th>user_id</th>\n",
+       "        <th>page</th>\n",
+       "        <th>revenue</th>\n",
+       "        <th>session_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:03:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:04:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:05:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>39.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:06:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:09:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:15:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:16:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:17:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>HELP</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:18:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:19:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>16.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:29:00</td>\n",
+       "        <td>201881</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:30:00</td>\n",
+       "        <td>201881</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:05:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:06:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>HELP</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:09:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:15:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:16:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:17:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 03:18:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 03:19:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 03:22:00</td>\n",
+       "        <td>202201</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>21.0</td>\n",
+       "        <td>3</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(datetime.datetime(2015, 4, 15, 1, 3), 100821, u'LANDING', 0.0, 
1L),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 4), 100821, u'WINE', 0.0, 1L),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 5), 100821, u'CHECKOUT', 39.0, 
1L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 6), 100821, u'WINE', 0.0, 2L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 9), 100821, u'WINE', 0.0, 2L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 15), 101331, u'LANDING', 0.0, 
1L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 16), 101331, u'WINE', 0.0, 1L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 17), 101331, u'HELP', 0.0, 1L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 18), 101331, u'WINE', 0.0, 1L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 19), 101331, u'CHECKOUT', 16.0, 
1L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 29), 201881, u'LANDING', 0.0, 
1L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 30), 201881, u'BEER', 0.0, 1L),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 5), 202201, u'LANDING', 0.0, 
1L),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 6), 202201, u'HELP', 0.0, 1L),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 9), 202201, u'LANDING', 0.0, 
1L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 15), 202201, u'WINE', 0.0, 2L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 16), 202201, u'BEER', 0.0, 2L),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 17), 202201, u'WINE', 0.0, 2L),\n",
+       " (datetime.datetime(2015, 4, 15, 3, 18), 202201, u'BEER', 0.0, 3L),\n",
+       " (datetime.datetime(2015, 4, 15, 3, 19), 202201, u'WINE', 0.0, 3L),\n",
+       " (datetime.datetime(2015, 4, 15, 3, 22), 202201, u'CHECKOUT', 21.0, 
3L)]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP VIEW IF EXISTS sessionize_output_view;\n",
+    "\n",
+    " SELECT madlib.sessionize(\n",
+    "     'eventlog',             -- Name of input table\n",
+    "     'sessionize_output_view',   -- View to store sessionize results\n",
+    "      'user_id',             -- Partition input table by user id\n",
+    "     'event_timestamp',      -- Time column used to compute sessions\n",
+    "     '0:30:0'                -- Time out used to define a session (30 
minutes)\n",
+    "    );\n",
+    "    \n",
+    "SELECT * FROM sessionize_output_view ORDER BY user_id, event_timestamp;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's say we want to see 3 minute sessions by a group of users with a 
certain range of user IDs. To do this, we need to sessionize the table based on 
a partition expression. Also, we want to persist a table output with a reduced 
set of columns in the table."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS sessionize_output_table;\n",
+    "\n",
+    " SELECT madlib.sessionize(\n",
+    "     'eventlog',                    -- Name of input table\n",
+    "     'sessionize_output_table',     -- Table to store sessionize 
results\n",
+    "     'user_id < 200000',            -- Partition input table by subset of 
users\n",
+    "     'event_timestamp',             -- Order partitions in input table by 
time\n",
+    "     '180',                         -- Use 180 second time out to define 
sessions\n",
+    "                                    -- Note that this is the same as 
'0:03:0'\n",
+    "     'event_timestamp, user_id, user_id < 200000 AS \"Department-A1\"',   
 -- Select only user_id and event_timestamp columns, along with the session id 
as output\n",
+    "     'f'                            -- create a table\n",
+    "     );\n",
+    "    \n",
+    "    SELECT * FROM sessionize_output_table WHERE \"Department-A1\"='TRUE' 
ORDER BY event_timestamp;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

Reply via email to