add 1.9.1 examples and recent PivotalR examples

Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/commit/86869127
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/tree/86869127
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/diff/86869127

Branch: refs/heads/asf-site
Commit: 86869127c808462e40a9d31f109ff05625a2565a
Parents: d1a3327
Author: Frank McQuillan <fmcquil...@pivotal.io>
Authored: Thu Dec 1 12:02:47 2016 -0800
Committer: Frank McQuillan <fmcquil...@pivotal.io>
Committed: Thu Dec 1 12:02:47 2016 -0800

----------------------------------------------------------------------
 .../Novelty-detection-demo-1.ipynb              |  478 +++++
 community-artifacts/Path-demo-4.ipynb           |  908 +++++++++
 community-artifacts/Pivot-demo-2.ipynb          | 1818 ++++++++++++++++++
 community-artifacts/PivotaR-demo-nov-2016.R     |  133 ++
 .../Prediction-metrics-demo-1.ipynb             |  343 ++++
 community-artifacts/Sessionize-demo-2.ipynb     |  615 ++++++
 6 files changed, 4295 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/86869127/community-artifacts/Novelty-detection-demo-1.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Novelty-detection-demo-1.ipynb 
b/community-artifacts/Novelty-detection-demo-1.ipynb
new file mode 100755
index 0000000..563bda4
--- /dev/null
+++ b/community-artifacts/Novelty-detection-demo-1.ipynb
@@ -0,0 +1,478 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Novelty detection using 1-class SVM\n",
+    "\n",
+    "Classifies new data as similar or different to the training set.  This 
method is an unsupervised method that builds a decision boundary between the 
data and origin in kernel space and can be used as a novelty detector."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The sql extension is already loaded. To reload it, use:\n",
+      "  %reload_ext sql\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Setup\n",
+    "%load_ext sql\n",
+    "# %sql postgresql://gpdbchina@10.194.10.68:55000/madlib\n",
+    "%sql postgresql://fmcquillan@localhost:5432/madlib\n",
+    "%matplotlib inline\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.font_manager"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": 
"iVBORw0KGgoAAAANSUhEUgAAAW8AAAD7CAYAAAClvBX1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHdNJREFUeJzt3W9wXNWZ5/HvkWWRTrADso2J48SAnRk2hICMijJFaqVN\n0mqGqWhG0hvCwDSwi3Zq+WOsNiiOCHFheRUnESQwM8WYsEhhirCVYTUjZid9LZKSqkSF7LA2lJeB\nAHaGTUIYYpydGOiJsHX2xbndarW69cfq7tu3+/ep6qL76va9R23z+PRznnOOsdYiIiLhUhd0A0RE\nZOkUvEVEQkjBW0QkhBS8RURCSMFbRCSEFLxFREKovlw3MsaoJlFE5DRYa03usbL2vK21gT6++tWv\nBt6GSnnos9Bnoc8iHJ9FIUqbiIiEkIK3iEgI1VTwbm1tDboJFUOfxQx9FjP0Wcyo9M/CzJdTKeqN\njLHlupeISLUwxmCDHrAUEZHiUPAWEQkhBW8RkRBS8BYRCSEFbxGREFLwFhEJIQVvEZEQUvAWEQkh\nBW8RkRAKffD2PI+utja62trwPC/o5oiIlEWop8d7nke8o4N9qRQAvZEIwyMjxGKxot5HRCQohabH\nhzp4d7W10T42Rtx/PQyMRqM8eeBAUe8jIhKUkq9tYoypM8YcNMaMFuuaIiKSXzG3QdsO/BOwuojX\nnFd3IkF8chKy0yaJRLluLyISmKL0vI0xG4Grge8U43qLFYvFGB4ZYTQaZTQaVb5bQk2D77IURcl5\nG2O+D+wFPgwkrLXtec7Ret4iBWjwXQoplPNedtrEGPOHwL9Ya583xrQCc26Stnv37szz1tbWit+p\nQqRc9g8Osi+Vygy+k0qxf3BQwbsGjY+PMz4+vuB5y+55G2P+K3AdcBKIAKuA/2Gt/dOc89TzFilA\nlVNSSFlKBY0xLShtIrJkSptIISVLm4jI8qUH3/cPD
 
gIwnEgocMu8Qj1JR0Sk2mkDYhGRKqLgLSIS\nQgreIiIhpOAtIhJCCt4iIiGk4C0iUkKlWrNGpYIiIiVSjMlXVbkZg4hIJSvGsgeq8xYRqSKaHi8i\nUiKl3DBGaRMRkRLyPC+zZk33aaxZo5y3iEgIKectIlJFFLxFpCy0R2dxKW0iIiWnzSZOn3LeIhIY\nbfN2+pTzFhE8z6OtrYu2ti6lLkJOdd4iNcLzPDo64qRS+wCYnIwzMjJcltRFKeuda5XSJiI1oq2t\ni7GxdshKXkSjoxw48GRZ7r/ceudapQ2IRSQvz/MYHNwPQCLRXbKgGovFFLCLSMFbpEYkEt1MTsbT\nmQsikV5aWm4LLJUiy6O0iUgNye1lDw7uDzSVIgtT2kRE5qQu0oFcwkelgiI1Zu/evaxZs4U1a7aw\nYcMqIpFeXOX1MJFIL4lEd9BNlEVQ2kSkhuzdu5e77/468IB/5Hbi8Q7eeOMEUNoBSzk9mmEpIqxZ\ns4Xjx79Cdo67sXEPb7/9WpDNknlohqWILJpmYlY+BW8JBa1IN7/FBtuenhuB20nnuOF2/9jsa3V0\nxBkba2dsrJ2Ojrg+80pkrS3Lw91KZOmSyaRdH4nYIbBDYNdHIjaZTAbdrIqRTCZtJLLewpCFIRuJ\nrC/4+SSTSbt58ydtff05dtWqj9v+/v4550Sjnf61rP8YstFoZ+b9TU0ttrFxs21qulJ/DmXgx865\nMTXfwVI8FLzldHVGo3ZoJpLYIbCd0WjQzaoY8wXbbPMF+WQyaaPRThuNdtqmppY511u16mM2Ho9b\nYxotbLOQsLDaGnO2bWpqURAvoULBW3XeIjVi164BfyalG6xMpWbqvLNnWTY03MGKFTs4dSr9zp2c\nOJFiePgJ4FbgYuAOoB5r7+PQIfd+zcwsL+W8peJ1JxJuFTpclrY3EqG7BlekK5TXTiS689Zq33DD\nDaxcuZ6VK9cTjUZ54YX/k74S
 
0AU8xNGjr3LttbeQSp0PnAucy9TURk6deh94CBgF/hr4C+BDwLP+\neWcCv+c/d4FfE37KLF93vBQPlDaRZUgmk7YzGrWd0WhNfkVfKK+dnfZIJpM2Ho9bWJ053z2/0sKZ\nFtbmHE/4z8+ysM5/vm1O6sQd+4iF9VnvX28hWTBVI8uHct4i4TVfXru/v982Nm62kci5NhJZZxsb\nN1tjPphzfsIPzp/KE5Q3+wE4O2Anc4L82f41zi0Q1FfnHfyU5SsUvJU2EQmx9IzJ48e/Qir1NVIp\ny/Hjf4y19cBTWWc+A3wLl+rItQ64BvhnXKrEA2K43PhOXH77s8AQkG+i3a+Bm5mYOFicX0oWZdkD\nlsaYjcB3gfXANPCwtfaB+d8lIkuRbznXRGKYa6+9BTfVPZ519qh/bAcuDw7wsv/f7qxzD+MC8hrg\nJHAecCVwLXAO8BbwH4CngQNAJ/ATYHvWvdK59jeBnxXjV5VFKkbP+yTQY629CLgCuMUYc2ERrisi\nvlgsxsiIW641Gh1dVGVHQ0MD9fV3UV9/F5//fLM/qPkmcB2uauQRYBD4MvABXOB+BNgAvAPcCEwC\n/wnYBPwt8CX/9Xagx7/Wm6e1oJVmcS5P0dc2Mcb8LfCgtfaHOcdtse8lUuvmLjS1E9ezfpj+/rsA\nuO++RwH4whc+k1mA6ujRoxw5cgfM2s/9YeCnwDf9Y7244PwM8Aug3z/fA3ZTV/cSH/rQarZsuYCB\ngV1LKhPM3U8zEulVqWEBZVnP2xhzHnAp7ruViJRYX18fAPfdt4dU6l3AUF//fc45ZyOPPvo4R478\nM/BpAIaHn2Dz5n/HBRdcwOrVH8650mHgVVzgzk7BfA2XPpn2z/H8n+9jehpOnNjJiy++sOR2Dw7u\nz1tzruC9eEUbsDTGnAn8DbDdWvtOsa4rIrPlphv6+vp4++3XeO+9X9HXt513332XI0dW+4G7Afgz\n/3EGR45
 
8jrGx8zl06HncQORO//EwcEHOnQ4DbwD34QY7H8b1xK/D5dVHgThTUxeqxjsARel5G2Pq\ncYH7MWvt3xU6b/fu3Znnra2ttLa2FuP2IjUjN92Qveek53ncc88g09P34wLvy7igey6wH/gk8ANc\ndci3/StuxwX4B0hPuJnxCHMHQ3v849/yX+8E1uLy5ItXaABWYHx8nPHx8YVPzFc/uNQHrtrkvgXO\nKWElpEhtmK/ee/bPOrPWIMmeVHN2njrtj+fUd2+zsLFATfjGOceMOfu0Jk7lTiyS/CjV2ibGmCuB\nPwEOG2MO4QpBv2ytTS732iKyVIdxU99fwA0/DeEqStK954fyvOd3uB502mu4KfHgUiRpO4Hfn/Pu\nSy/99GnlqnP305SlWXbwttY+A6woQltEZAG56Ya6uh20tLh1XlpatjI2lq48acet270u5wpX4nLd\naT24/tZqXAplFS7Qp4NqHEjgBixvAqJkp1EaGu5kYOCxov1+snhaVVAkRGKxGH19t3HPPQmmpz/B\n9PRN7N37IM3Nzf4Mx9k56jPP3MV77+1gejp9ZBjXgx7FDUb+zj9+CpcHz857P4PLm38ON4HnYdyK\ngtfhgv4G4P3MvTzPywxcpmu+s1+rl11cCt4iITMxcZDp6ZlUSCp1ccFqjyuuuIJEopvBwf08++xz\nnDgRB/r8nw7jeuH/ETcBB1yP+zZcyWB6ULIXuNl/3oObYv84EGNqajjvsrITE9cD7zM15a6RPbAq\nxaHgLVIlClVwpHPLW7ZcxIkT6d4zuLQK/uvsdMjf4wJ3dpXJflwq5oO4ssPZQTi3bntqClx+XXXc\npaKFqUQqXG5dd6H1uxeaQv+b3/wO14NO12jfDESoq9vBzLT5BHAkTytewa2Vcgaut+7uXVe3g0Si\nm2PH3i7dByB5FX16fMEbaXq8yJIVmkYOS88nb93ayqFDN5I9Jd6YHezZk2Dfvr/ixAmL
 
68/9q//z\n7Frw94G/9F/3AOdQV/dr7r13B83NzbS3X8PUVD3pqfX19Qnq6k5l0iaa/n76Ck2P13reIhVssftT\nLkYymbQNDenNFoasMWfZ/v7+rONddmYDh/T635ut28BhdhsaGzdnarNn2pjM1JenNydWHffyoT0s\nRWpbLBZjdPSxrB77E8RiMdraupia+gawh9nVKhezYsWdTE+vJPdL82WXXZKnFx3zH8OsXTuqOu4S\nU/AWqWDFnka+1IDqvrLfhKs4cVye+3sla6MsjnLeIhUut3662L3Zmbz6Z4AxZpaXvZ3Nmzdy5Mhd\nzKyP8gZNTSs4eHCyrG2sZYVy3greIpIJvkePvsxbb73DypUr6em5kebmZq27HTAFbxE5LepVB0vB\nW0QkhAoFb03SqXGe59HV1kZXW5v2ERQJEfW8a5jnecQ7Otjnlwn0RiIMj4zoa7FIBVHaROboamuj\nfWxs1ha0o9EoTx44EGSzRCSL0iYiIlVEk3RqWHciQXxyErLTJolEwK0SkcVQ2qTGeZ7H/sFBwAVz\n5btFKoty3iIiIaSct4hIFVHwFhEJIQVvEZEQUvAWEQkhBW8RkRBS8BYRCSEFbxGREFLwFhEJIQVv\nEZEQUvAWEQkhBW8RkRBS8BYRCSEFbxGREFLwFhEJIQVvEZEQUvAWEQkhBW8RkRAqSvA2xlxljHnZ\nGPOKMaa3GNcUEZHClr0NmjGmDngF+BzwBvCPwDXW2pdzztM2aCIiS1TKbdAuB1611r5urX0feAL4\noyJcV0RECihG8P4o8POs17/wj4mISInUl/Nmu3fvzjxvbW2ltbW1nLcXEal44+PjjI+PL3heMXLe\n24Dd1tqr/NdfAqy1dl/Oecp5i4gsUSlz3v8IbDHGbDLGNADXAKNFuK6IiBSw7LSJtfaUMeZW4ADu\nH4NHrLUvLbtlIiJS0LLTJou+kdImIiJLVsq0iSyC53l0tbXR1da
 
G53lBN0dEQk497zLwPI94Rwf7\nUikAeiMRhkdGiMViAbdMRCpdoZ63gncZdLW10T42Rtx/PQyMRqM8eeBAkM0SkRBQ2kREpIqUdZJO\nrepOJIhPTkJ22iSRCLhVIhJmSpuUied57B8cBFwwV75bRBZDOW8RkRBSzltEpIooeIuIhJCCt4hI\nCCl4i0hRaBZxeWnAUkSWTbOIS0fVJiJSMppFXDqhrzbRVzIRkRmhmGGZ+5UsPjmpr2QiFUSziMsv\nFGkTfSUTqXyaRVwahdImoeh5i0jli8ViCthlFIqcd3ci4b6G4XrdvZEI3fpKNi/P82hr66KtrWvR\nYwQaVxAJj1CkTUBfyZbC8zw6OuKkUvsAiER6GRkZnvczU6mXSGVSqWANaWvrYmysHbJGCaLRURKJ\nbgYH9wOQSHTPCswaVxCpTMp517hjx96e1RufnIwv2BsXkcql4F2FEoluJifj6aotIpFeYIsfuF3f\nOpWCwcH9meCtUi+RcAnFgKUsTSwWY2TEpUqi0VFGRoZZu3b9gu8ZHhlhNBplNBpVvlukwinnXcU8\nz8vkuFtatrJ374NLGsQUkeBpwLLGeJ5He/v1TE19A4CGhju5557tTEwcBOYOWIpIZVLwrjFbt7Zy\n6NCNZFecNDU9ysGD4wG2SkSWKvQLU8nSvP76LxZ17HQm84hI8BS8Q2axwXbTpnOBnZCZl7rTPzb7\nWh0dccbG2hkba6ejI64ALsuiWbplZK0ty8PdSpYjmUzaSGS9hSELQzYSWW+TyWTec/v7+60xZ1rY\nZmGbbWg4a8650Winfy3rP4ZsNNqZuVdTU4ttbNxsm5quLHgfkbRkMmnXRyJ2COwQ2PWRiP7eFIEf\nO+fG1HwHS/FQ8F66ZDJpO6NR2xmN2mQyOW+wzX2fC/IJC9tsXd0a29/fn/lZNNppo9FO29TUMud6\nq1Z9zDY1tdj6+g9n/pGA1daYs21TU4v+Z5
 
SCOqNROzTzl8kOge2MRoNuVugVCt6apFOh8q1hvuHC\nrYt6765dA7Mm5ExPDzMxMUpz8+w1Txoa7mDFih2cOpV+505OnEhx6NDlgAEeBv4AaMDab3LoEHR0\naGamSEXIF9FL8UA970VLJpN2U+O6Ob2YlqamvGmT/v5+u2rVx2x9/Tn2Ix85zxrT6J+TtNBpYZvd\nvPli29i42U+jJP3LJiykUyud/vEhC2syvXb3fOHevojSJqWBet7hkB5EXJE6B/j1rJ+tWbuWkZGB\nrMWlhnnuuee4++6vAw8A8Ktf3QFcBNwKfAD4JgBHjtwO3AxcjOuR3wb8d+As4M9g1pJUnwCeAVYA\nEeAh4FxAvW0pLD1LN73657BW/ywp1XlXmJkVAc9lNR08wMxaI7f19TExcZBjx94GTrJ27Xp+/OMf\n8847A8wE353AI8BG/3l2UN4DXIJLifwQ+BZwGJceecA/bwdwE7AfF/y/kXXdOPAw/f130dfXV5oP\nQERm0aqCoRPjt4xwKwnWNL7FbT3buffeb2dmTLpgug14GniKmSD9DC4oj+a55jrgfOBR4EJcbzr9\nvruB94DP4oL5OcBXsn4OLvjfzMTEQRS7RYK1rOBtjPk68AXgd8AR4EZr7W+L0bBalbsi4KnIMf7q\n8cfYtWvAD9zZwXQU12O+AxfMLwZe8X/WnXXuYVzA/jiuV/4t//j1wO8Bp4D/h0uh/NJ//W6e1l3i\n3+Nny/49C9GmGyKLlC8RvtgH8Hmgzn/+NWBgnnNLntivFtnlfOkBHzfYOHvg0A0yDlnYZlesWJcZ\nsGxoWOcfT1hjVlnILvs7y8KVFvr9AcmN/uBko/+zpP96bc771lpIzFtbXozfWwNeIrNRYMCyaDlv\nY8wfA13W2usL/NwW615htNwe5datn+HQoZ+SHoCcyUH/NXAdTU3PsXbtGsCtIJhegOrYsbfnrHHi\n/p19g5k8dy9wHS7lc
 
gbwon+fp4AfUVdnOOOMOi688CIGBnYtue3ZqxvOtyCWdvMRmascOe+bgCeK\neL2qka9me6nrZQ8MfIX29muYmnoIV4WSwgXb6zDmEZ5/PoW16wD44Q//J5dcsi0TzGc7DBzHBe7s\nFMzXgHeAf/OPnwtMAvczPQ2p1E5efPGFpf3izN1PUzv4iBTHgj1vY8wYkL2SvwEs0Getfco/pw/Y\naq3tmuc6NdvzPt0eZW6PFci8tva3PP30QeCDuGBcz0xPOl0WCC7HbXD/toIbjPwks8sDdzK74uQO\nXIngtczkt88HniEa3cCBA08u+ncvtJ9mvmtoE2SRuU67522tjS5w4RuAq3FlCvPavXt35nlrayut\nra0LvaVmFeqxHjjwJJ7ncfXVXwTux/Wk04OQccDDBefHgSlmBie34wYi/5zZVSb478/tifcwe3Bz\nJ7AW2FDk33SG6oRFYHx8nPHx8YVPzJcIX+wDuAqXIF2ziHNLltCvdKczEDffOiazf9bpDzymZ1Sm\nZ2Bum/N+2JB1LJk1YPmpPOdunHPMmLOXPIC4lMW0RGQuSjTD8kGgARgzxgA8a639L8u8ZtUpTY/y\nMNAFvABciht0PB9Ir2mSr877FK4HnfYabsAT3IBl2k7g9+e8+9JLP73kdqf308yeFaretMjyLSt4\nW2s/UayGVLtYLLakoJVb711Xt4OWFrebe0vLVsbG0lPi23E57ijwk6wrdDM7IPcAJ4GzcSmUVbgg\nn25THPgSrmT/Jv96M2mUhoY7GRh4bNHtz7bU311EFqYZlhUqFovR13cb99yTYHr6E0xP38TevQ/S\n3NzslwHOzlE3Nu5h06ZNvPjinUxNpY+exE3O+Z3/OBv3R/5tZue9n8FN7nkPtz/Hd3CTca7DBf0N\nwPuZe803kKq9MUXKQ8G7gk1MHGR6epB0kE2lLs4EyVyXXXZJZjDz2mtv4fjxdbjKzRgwTH19gpMn\n
 
/w235gn+8dtw9dzZg5Jx3EDlLmATbuAzxtTUTOojeyB1YuJ64H2mptw1VAooUh4K3iGUm1KJRHpJ\nJIYB12Pv6bnRX2nwTVxh4u2cPDkN/GfgWdziUwB/z0yVStqof6wHV044OwgPDu6ftVa46+U/xMw/\nMO4cBW+R0lLwriC56YhCQXqhQcAnn/yB/+yhrKufRV3df2N6Ol1euANX/13IBtwgqFNXt4NE4nvs\n2jWwzN9SRIpBS8JWiNy67kikl5ER15teaj55zZotHD+evSLgMHA3TU2beO21/8uJExaX2/5X/+ff\n9v+7E2gBfgR8FLgGOAi8QVPTiqxZnvWkp+nX1yeoqzuVSZuk262et0hxFJqko510KsRi96dcjHx7\nUxpztu3v7/cXreqysDqzeJVbkOpjFj4w72JUM22c2aEnvTlx7kJaIlIcaCed2jEwsIv29uszVSfG\n3MGePTuZmDjoLyu7h9nVKhezYsWdTE9HsPZ+cqtYHn/c9aRnBktjpAdC164dVSmgSAAUvCvEfIOQ\nSxWLxRgdfSwr3fIEsViMiYmCS8/4X83mTsy57LJLMoG5mG0UkeVRzruCLHbp1OVc3+XVPwOMkb2Q\n1ebNGzly5A9wMy5d3r2ubgf/8A/fm9WOUrdRRGYrlPNW8K4x6eB79OjLvPXWO6xcuZKenhtpbm72\nA7tb17uu7lXuvXeH9qoUCZiCtyxIvWqRyqPgLSISQoWCd10QjRERqVae59HV1kZXWxue55XsPup5\ni4gUSSl2g1LaRESkxEqxibbSJiIiVUSTdEREiqQ7kSA+OQnZaZNEoiT3UtpERKSIPM/LbHnYXYQt\nD5XzFhEJIeW8RUSqiIK3SMiUq45YKpvSJiIhUoo6YqlsynmLVIFS1BFLZVPOW0SkiqjOWyREyllH\nLJVNaRORkCl2HbFUNuW8RURCSDlvEZEqouAtIhJCCt4iIiGk4C0iEkIK3iIiI
 
aTgLSISQgreIiIh\npOAtIhJCCt4iIiFUlOBtjEkYY6aNMY3FuJ6IiMxv2cHbGLMRiAKvL785IiKlUW2bWCx7bRNjzPeB\ne4FR4DJr7fEC52ltExEJRJg3sSjJ2ibGmHbg59baw8u5johIKe0fHGRfKkUciAP7UqnMyoxhteB6\n3saYMWB99iHAAncDX8alTLJ/VtDu3bszz1tbW2ltbV18S0VEasD4+Djj4+MLnnfaaRNjzKeAp4H3\ncEF7I/BL4HJr7Vt5zlfaREQCUY1pk6Kt522M+Rmw1Vr7mwI/V/AWKRFt0LCwsH5G5QjeR4FmDViK\nlFeYe5WyMO2kI1KltKN8ddNOOiIiVUS7x4uEnHaUr01Km4hUgbAOxsnClPMWqSIK1rVDwVukSqi6\npLYoeItUCVWX1JaqrTaptpXCREQWI9TVJrlfH+OTk/r6KFVP1SUCIU+b6Ouj1CoNWNaOQmmTUPe8\nRWpVLBZTwK5xoQ7e+vooIrUq1GkT0NdHkWqm/79VKigiIaN6dkfBW0RCRQUJTtXWeS/FYrYWqhX6\nLGYU+ixqcQ6B/l7MqPTPQsG7RumzmJHvs0h/ZW8fG3O9v46OmgjglfT3ojuRcKkSXK+7NxKhu4wF\nCZX0WeQT6moTkVLJ3m0cAH+38VrLtwYpFosxPDKSGbAcrtEBy0IUvEWkYqmevbCyDliW5UYiIlUm\n0GoTEREpnpoasBQRqRYK3iIiIVSzwdsYkzDGTBtjGoNuS1CMMV83xrxkjHneGPOkMWZ10G0qN2PM\nVcaYl40xrxhjeoNuT1CMMRuNMT8yxrxojDlsjLk96DYFzRhTZ4w5aIwZDbot+dRk8DbGbASiwOtB\ntyVgB4CLrLWXAq8CuwJuT1kZY+qAPwdiwEXAF40xFwbbqsCcBHqstRcBVwC31PBnkbYd+KegG1FI\nTQZv4H7gzqAbETRr7dPW2mn/5bPAxiDbE4DLgVetta9b
 
a98HngD+KOA2BcJa+6a19nn/+TvAS8BH\ng21VcPwO3tXAd4JuSyE1F7yNMe3Az621h4NuS4W5CfhB0I0os48CP896/QtqOGClGWPOAy4FfhJs\nSwKV7uBVbDleVU7SMcaMAeuzD+H+EO4GvoxLmWT/rGrN81n0WWuf8s/pA9631j4eQBOlghhjzgT+\nBtju98BrjjHmD4F/sdY+b4xppUJjRFUGb2ttNN9xY8yngPOAF4wxBpcm+N/GmMuttW+VsYllU+iz\nSDPG3ID7evjZsjSosvwS+HjW643+sZpkjKnHBe7HrLV/F3R7AnQl0G6MuRqIAKuMMd+11v5pwO2a\npaYn6RhjfgZstdb+Jui2BMEYcxUwCPx7a+3bQben3IwxK4CfAp8DfgX8L+CL1tqXAm1YQIwx3wWO\nWWt7gm5LpTDGtAAJa2170G3JVXM57xyWCv1KVCYPAmcCY35J1F8G3aBystaeAm7FVd28CDxRw4H7\nSuBPgM8aYw75fx+uCrpdUlhN97xFRMKq1nveIiKhpOAtIhJCCt4iIiGk4C0iEkIK3iIiIaTgLSIS\nQgreIiIhpOAtIhJC/x+QMzAQu1m4swAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x10a034250>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Generate train data\n",
+    "X = 0.3 * np.random.randn(100, 2)\n",
+    "X_train = np.r_[X + 2, X - 2]\n",
+    "X_train_D = pd.DataFrame(X_train, columns=['x1', 'x2'])\n",
+    "\n",
+    "# Generate some abnormal novel observations\n",
+    "X_outliers = np.random.uniform(low=-7, high=7, size=(40, 2))\n",
+    "X_outliers_D = pd.DataFrame(X_outliers, columns=['x1', 'x2'])\n",
+    "\n",
+    "b = plt.scatter(X_train[:, 0], X_train[:, 1], c='blue')\n",
+    "c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red')\n",
+    "plt.axis('tight')\n",
+    "plt.xlim((-5, 5))\n",
+    "plt.ylim((-5, 5))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "200 rows affected.\n",
+      "Done.\n",
+      "Done.\n",
+      "40 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Build tables\n",
+    "%sql DROP TABLE IF EXISTS X_train_D CASCADE\n",
+    "%sql PERSIST X_train_D\n",
+    "%sql ALTER TABLE X_train_D add column X float[]\n",
+    "%sql update X_train_D set X = array[x1, x2]::float[]\n",
+    "\n",
+    "%sql DROP TABLE IF EXISTS X_outliers_D CASCADE\n",
+    "%sql PERSIST X_outliers_D\n",
+    "%sql ALTER TABLE X_outliers_D add column X float[]\n",
+    "%sql update X_outliers_D set X = array[x1, x2]::float[]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>coef</th>\n",
+       "        <th>loss</th>\n",
+       "        <th>norm_of_gradient</th>\n",
+       "        <th>num_iterations</th>\n",
+       "        <th>num_rows_processed</th>\n",
+       "        <th>num_rows_skipped</th>\n",
+       "        <th>dep_var_mapping</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>[-0.0652579039751, -0.132031595902, -0.736674560761, 
0.110776065553, -0.257405638109, 0.0592959032279, 0.394264859682, 
-0.696044045897, 0.355378680096, -0.917519476201, 0.499471522957, 
-0.050984665751, -0.134700659181, -0.0705623616132, -0.0440350483425, 
-0.730837500515, -0.0321151797135, -0.893641152253, -0.686424263168, 
-0.568143960852, -0.12323421025, -0.136522678866, -0.626743403999, 
-0.256609467659, -0.285826104108, -0.117860781456, 0.318213569126, 
0.465683621798, 0.569073368875, -0.319718151386, -0.032926380078, 
0.330495474386, 0.0426841287321, 0.621705430279, 0.444381604771, 
0.347966836495, 0.0573812330683, -0.103244905478, -0.575045339435, 
-0.101783824252, 0.624406852044, 0.113168204922, 0.506933705459, 
0.560256594355, 0.142564738705, 0.338469597153, -0.0531770790698, 
0.107924238733, 0.665718502542, 0.712455036796, -0.265273701697, 
-0.133616355952, 0.413526223018, -0.277088845804, -0.49433371065, 
-1.00004534127]</td>\n",
+       "        <td>73.1866371844</td>\n",
+       "        <td>113.184300907</td>\n",
+       "        <td>100</td>\n",
+       "        <td>201</td>\n",
+       "        <td>-1</td>\n",
+       "        <td>[-1.0, 1.0]</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[([-0.0652579039751, -0.132031595902, -0.736674560761, 0.110776065553, 
-0.257405638109, 0.0592959032279, 0.394264859682, -0.696044045897, 
0.355378680096, -0.917519476201, 0.499471522957, -0.050984665751, 
-0.134700659181, -0.0705623616132, -0.0440350483425, -0.730837500515, 
-0.0321151797135, -0.893641152253, -0.686424263168, -0.568143960852, 
-0.12323421025, -0.136522678866, -0.626743403999, -0.256609467659, 
-0.285826104108, -0.117860781456, 0.318213569126, 0.465683621798, 
0.569073368875, -0.319718151386, -0.032926380078, 0.330495474386, 
0.0426841287321, 0.621705430279, 0.444381604771, 0.347966836495, 
0.0573812330683, -0.103244905478, -0.575045339435, -0.101783824252, 
0.624406852044, 0.113168204922, 0.506933705459, 0.560256594355, 0.142564738705, 
0.338469597153, -0.0531770790698, 0.107924238733, 0.665718502542, 
0.712455036796, -0.265273701697, -0.133616355952, 0.413526223018, 
-0.277088845804, -0.49433371065, -1.00004534127], 73.1866371844, 
113.184300907067, 100, 201L, -1L, [-1
 .0, 1.0])]"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql \n",
+    "-- Train the model\n",
+    "DROP TABLE IF EXISTS svm_out1, svm_out1_summary, svm_out1_random 
CASCADE;\n",
+    "SELECT madlib.svm_one_class(\n",
+    "    'X_train_D',    -- source table\n",
+    "    'svm_out1',     -- output table\n",
+    "    'X',            -- features\n",
+    "    'gaussian',     -- kernel\n",
+    "    'gamma=1, n_components=55, random_state=3', \n",
+    "    NULL,           -- grouping \n",
+    "    'init_stepsize=0.1, lambda=10, max_iter=100, tolerance=0'  \n",
+    "    );\n",
+    "SELECT * FROM svm_out1; "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "200 rows affected.\n",
+      "Done.\n",
+      "1 rows affected.\n",
+      "40 rows affected.\n",
+      "20 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>index</th>\n",
+       "        <th>prediction</th>\n",
+       "        <th>decision_function</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.185553475523</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.998404919902</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.338487093573</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.307355005857</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-1.07966449719</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-1.97175772304</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-2.20381705572</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>7</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-2.06660552658</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-1.44928341418</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.418765699618</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.624364032639</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>11</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.368526211192</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>12</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.304497177977</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>13</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.0421685610513</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>14</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-1.01890271929</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>15</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.669735104545</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>16</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.947241377137</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>17</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.576672641354</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>18</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.550855208608</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>19</td>\n",
+       "        <td>-1.0</td>\n",
+       "        <td>-0.315200233978</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0L, -1.0, -0.185553475522983),\n",
+       " (1L, -1.0, -0.998404919902401),\n",
+       " (2L, -1.0, -0.338487093572938),\n",
+       " (3L, -1.0, -0.307355005857271),\n",
+       " (4L, -1.0, -1.07966449719332),\n",
+       " (5L, -1.0, -1.97175772303719),\n",
+       " (6L, -1.0, -2.20381705571656),\n",
+       " (7L, -1.0, -2.06660552658075),\n",
+       " (8L, -1.0, -1.44928341418456),\n",
+       " (9L, -1.0, -0.418765699617653),\n",
+       " (10L, -1.0, -0.624364032638881),\n",
+       " (11L, -1.0, -0.368526211192035),\n",
+       " (12L, -1.0, -0.304497177977408),\n",
+       " (13L, -1.0, -0.0421685610512577),\n",
+       " (14L, -1.0, -1.01890271928657),\n",
+       " (15L, -1.0, -0.669735104544719),\n",
+       " (16L, -1.0, -0.94724137713711),\n",
+       " (17L, -1.0, -0.576672641354374),\n",
+       " (18L, -1.0, -0.550855208607514),\n",
+       " (19L, -1.0, -0.315200233978155)]"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Prediction\n",
+    "# First for the training data\n",
+    "%sql drop table if exists y_pred_train;\n",
+    "%sql SELECT madlib.svm_predict('svm_out1', 'X_train_D', 'index', 
'y_pred_train');\n",
+    "y_pred_train = %sql SELECT * from y_pred_train; \n",
+    "\n",
+    "# Next for the outliers\n",
+    "%sql drop table if exists y_pred_outliers;\n",
+    "%sql SELECT madlib.svm_predict('svm_out1', 'X_outliers_D', 'index', 
'y_pred_outliers');\n",
+    "y_pred_outliers = %sql SELECT * from y_pred_outliers; \n",
+    "\n",
+    "%sql SELECT * FROM y_pred_outliers limit 20; -- Show the outliers\n",
+    "#%sql SELECT * FROM y_pred_train limit 20; -- Show the training data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "10000 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Predict over the decision grid for plotting\n",
+    "# xx, yy = np.meshgrid(np.linspace(-7, 7, 500), np.linspace(-7, 7, 
500))\n",
+    "xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))\n",
+    "grid_points = pd.DataFrame(np.c_[xx.ravel(), yy.ravel()], columns=['x1', 
'x2'])\n",
+    "\n",
+    "%sql DROP TABLE IF EXISTS grid_points CASCADE\n",
+    "%sql PERSIST grid_points\n",
+    "%sql ALTER TABLE grid_points add column X float[]\n",
+    "%sql update grid_points set X = array[x1, x2]::float[]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "10000 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": 
"iVBORw0KGgoAAAANSUhEUgAAAW8AAAEKCAYAAADdBdT9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFNZJREFUeJzt3X+wHWV9x/HPJ6QoGsKAv2oT4AZCtRMHGdoyQ5gBwVIi\nqTjT1grVGqvVPzSF8qsU0CGjpSrViUWGmVraGWKHhoqjRKwVHDM4NQ2oCCKgJJCEyK9UoMZIRUK+\n/WP3kM3NOfeee8+e3X1236+ZM+zZs2f32eXOJ9999jm7jggBANIyp+4GAABmjvAGgAQR3gCQIMIb\nABJEeANAgghvAEgQ4Y0k2D7F9va621EF24fb3mnbdbcFzUV4Y0Zsb7X9pO2DCvPeZ3t9BZt/8UcJ\ntrfYPm02K7F9pO09eUDutP247XW2f28G67jC9prZbL/PuvbZl4jYHhHzgx9hYAqEN2YqlP3d/FWf\n+SkJSYdExHxJb5T0DUlfsv3uepsFDIfwxmz8vaQLbc/v96HtpbbvtP2M7Ttsn5jP/xPb35m07Pm2\nv5xPH2j7U7a35dXwtbZf0mf9ayQdIemWvHK+yPYttj80abl7bL9tiv2wJEXEjoi4WtIqSZ8sfP+1\ntm+yvcP2Q7b/Mp9/hqTLJL3D9s9tfz+fP9/2dbYfs73d9seKXR+232/7/rzNP7R9XGFfvlLYl96Z\nwZxCO262/ZTtB23/RWGdV9i+0fb1+ffvtX38FPuMtogIXryGfknaIuk0STdJ+lg+732SvplPHyrp\naUl/qqw4ODt/f6ikgyT9TNLRhfXdKent+fRqSV+WdIikl0u6WdKV+WenSHpkUjtOLbx/u6SNhfdv\nlPQ/kub22YcjJb0gac6k+Ysk7ZH0OmXB/l1Jl0s6QNKEpM2STs+XvULSmknf/5KkayW9VNIrJW2U\n9P5C+7ZLOj5/f5Skwwfsyz7tk/QtSZ+V9Gv5fu2Q9
 
KZCO56VdEbe5r+T9N91/53wGv+LyhuzdYWk\nlbZfMWn+ckkPRsQNEbEnItZK+pGkt0bE/0laJ+kcSbJ9jLKgXJd/9/2Szo+In0XELyR9orfsAMUL\neuskHWP76Pz9uyTdGBG7Z7BPj+X/PUzS70p6ZURcGREvRMRWSdcp+8do/4bYr5b0lrz9v4yIn0r6\nTGH590m6KiLukqSIeDgiihdg+16ctH24pBMlXRIRz0fEPXk7it07/xURX4+IkPR5ScfOYJ+RqLl1\nNwBpioj7bN8i6VJJDxQ++g1J2yYtvk3Sgnz6BkmfkvS3yqrzL0fEc7ZfJellkr5X6GmYowGh1qc9\nz9m+UdK7bH9UWej/0Qx3a4GyvvCnlQXgAttP5585b8+3Bnz3SGWV8eN5+52/Hsk/P1zSQzNsjyS9\nVtLTEfFsYd42Sb9deP9EYfpZSS+1PSci9sxie0gE4Y1RrJJ0l6RPF+Y9pv1D8whJX8unb5P0Kttv\nVFaV9i58/lRZ8CyJiMeH2Ha/C6RrlFWe35b0i4i4Y4j1FP2hpB0R8WPbh0p6OCJeN+T2t0v6paRX\n5BXwZNslHd1nfr91FT0m6TDbL8/PRqTseD46xXfQAXSbYNYi4iFJN0o6tzD7P5R1X5xt+wDb75D0\nW5Juyb+zW9IXlF30PFRZmCsPvH+S9Jm8CpftBbZ/f8Dmn1DWb1xsz0ZlfdafVhbiU+lVxrL9atsr\nJX1E0t/kn98p6ee2/9r2S/N9WWL7d/LPn5Q00bsgGRFPSLpV0mrbBztzlO2T8+Wvk3RR72Ki7aPz\nLpHeuvbZF+29mPoTSRskfdz2S2wfq6wLZqr9Y3x4BxDemKnJVeJHlXV3hCRFxNOS/kDSRcqq6Ysk\nLc/n9/ybpDdL+vdJp/aXKLsouNH2/yoLw98c0I5PSPqI7adtX1CYv0bSGyT96xD78Yztn0v6gaRl\nkv44Iq7P92NPvh/HKbuguEPZ
 
Py69ETZfUBaST9n+bj5vhaQDJd2vrOvlC5J+PV/fTZKulHSD7Z3K\nLm4eln/v4332pXicz1F2MfUxSV+U9JGImGpcfWrDNjEL7n+GB6TJ9p8pG+Fx8rQLAwmj8kZr2H6Z\npA9K+se62wKMG+GNVsj7xndIelxZtwzQanSbAECCqLwBIEGVjfO2TYkPALMQEfsN/6z2RzqrKt3a\n/tZLOrXmNjQFx2IvjsVeHIu9mnIsVvWfTbcJACSI8AaABHUrvCfqbkCDTNTdgAaZqLsBDTJRdwMa\nZKLuBkytW+G9qO4GNAjHYi+OxV4ci70afiy6Fd4A0BKENwAkiPAGgAQR3gCQIMIbABJEeANAgghv\nAEgQ4Q0ACSK8ASBB1d5VcBw2S/M2ZJO7lkpaXGtrAKASaYf3Zmn+Wunq3dnbcx+Rdp4tAhxA6yUd\n3vM2ZMG9ojdjt7Ryg7SL8AbQcqX1edueY/su2+vKWicAoL8yK+/zJN0vaX6J65zSrqVZV4l63SZz\n835vAGi5UsLb9kJJZ0q6UtIFZaxzKIuzPu6VXLBEG3DxHTNQVuW9WtLFkg4paX3DW0wfN1qAi++Y\noZHD2/ZySU9GxN223yRpv6ccv2h9YXpCjb/ZOVAVLr7jRVskbZ1+sTIq75MknWX7TEkHSTrY9pqI\nePd+SzbhScwA0GSLtG9he3v/xUYO74i4TNJlkmT7FEkX9g1uAANx8R0zlfQ4b6A1uPiOGSo1vCPi\ndg0s8gFMiYvvmAFuTAUACSK8ASBBhDcAJIjwBoAEEd4AkCCGCgLAOI3pnjWENwCMyxjvWUN4A8CY\njPOeNfR5A0CCqLwBYEzGec8awhsAxmWM96whvAFgnMZ0zxr6vAEgQVTeQEfEMdl/vammBvCMzlIR\n3kDieqFc1vJjCXee0Vk6whtIzEzDetT1lxHmPKOzfIQ3kIBxB/Yw266tuwV9Ed5Ag9UZ2pONEuI8\no7N8hDf
 
QME0K7H5mFeI8o7N0hDfQIE0P7qIZhzjP6CwV4Q00QEqhPRl94vXgRzpAzVIO7qK27Ecq\nCG+gRm0LvLbtT5MR3kBN2hp0cUx7961JCG8AY0GAjxfhDdSgK8HWlf2sA+ENYKwI8PFgqCDSwB3p\nkhbHMJSwbFTeaL78jnTXPJy95q/N5qWqq5VoV/d7XKi80Xjcka49qMDLQ+UNAAmi8kbjcUe6dqH6\nLocjopoN2aFVlWwKbdSyC5b0/xLgQ1slRYQnz6byRhq4I13rUIGPhj5vAEjQyOFte6Htb9q+z/a9\nts8to2FAm1FxZug+mr0yKu/dki6IiCWSTpT0IduvL2G9ADqAAJ+dkcM7Ip6IiLvz6V2SHpC0YNT1\nAm1H9Y1RlNrnbXtC0nGS7ihzvQDajep75koLb9vzJN0k6by8AgcwDapvzFYpQwVtz1UW3J+PiJsH\nLri+MD0haVEZWwfQBgwdzG2RtHX6xcoa5/0vku6PiH+YcqlTS9oa0CLeRLcBChZp38L29v6LlTFU\n8CRJ75R0mu3v277L9rJR1wsAGGzkyjsivi3pgBLaAnQW1XeGrpPh8QtLAEgQ4Q00BBUnZoLwBoAE\nEd5Ag1B9Y1iEN9AwBDiGQXgDaAz+4Roe4Q00ECGG6RDeQEN1LcC7tr+jIryBButKoHVlP8tEeANA\ngghvoOHaXJV6U7v3b5wIbyABbQy4Nu5Tlcq6JSxStVmatyGb3LVU0uJaW4MptOXmVYR2OQjvLtss\nzV8rXb07e3vuI9LOs0WAN1jKAU5ol4vw7rB5G7LgXtGbsVtauUHaRXg3Wi8EUwhxAnt8CG8gUU2t\nwgnsahDeHbZradZVol63ydy83xvJaEIVTljXg/DussVZH/dKLlgmrxig4w5ywroZCO+uW0wfd9tM\nDtfZhDkB3XyEN9ByBHE78SMdAEgQ4Q0ACSK8ASBBhDcAJIjwBoAEEd4AkCDCGwASRHgD
 
QIIIbwBI\nEOENAAkivAEgQYQ3ACSI8AaABBHeAJAgwhsAEkR4A0CCSglv28ts/8j2g7YvKWOdAIDBRg5v23Mk\nXSPpDElLJJ1j+/WjrhcAMFgZlfcJkjZFxLaIeF7SWklvK2G9AIABygjvBZK2F97/JJ8HABiTah9A\nvL4wPSFpUaVbB4Dm2yJp6/SLlRHej0o6ovB+YT5vf6eWsDUAaLNF2rewvb3/YmV0m3xH0mLbR9o+\nUNLZktaVsF4AwAAjV94R8YLtlZJuVfaPwT9HxAMjtwwAMFApfd4R8Z+SXlfGugAA06v2gmWXbZbm\nbcgmdy2VtLjW1gBIHOFdhc3S/LXS1buzt+c+Iu08WwQ4gFkjvCswb0MW3Ct6M3ZLKzdIuwhvALPE\njakAIEFU3hXYtTTrKlGv22Ru3u8NALNEeFdhcdbHvZILlgBKQnhXZTF93ADKQ3h3XBwz9efeVE07\nAMwM4d0x04X1VMsT5EBzEN4dMdPQnmodhDhQP8K75coI7UHrJMSxD35FXCnCu6XGEdr9tkGAQxK/\nIq4B4d0yVYT25O0R4OBXxNVLJ7w5JZtS1aE9edsEOFCtNMKbU7KB6gztIgK82/gVcfWSCG9Oyfpr\nSnD3EOAdxq+IK5dEeGNfTQvtIgK8w/gVcaWSCG9OyfYaZ3B//QfS576aTX9guXTGsbNbDwEOjF8S\n4c0p2fir7a//QFqxWvrkr7L3Kx6Urj9/9gEOYLzSCG+p06dkVXSTfO6rWXC/eF3hV9k8qm+gmXgY\nQ8M1uX8bQH3Sqbw7purQ/sDyrKtEebfJJQdK1y8fbZ1U38D4EN4NVEe1fcaxWR9374Ll9SNcsAQw\nfo6IajZkh1ZVsqmktbGbhOobGMEqKSI8eTZ93g3SxuAGMB6Ed0MQ3ABmgvBugLYHd9v3D6gDFyxr\nRrChVbj7Z2UIbwDl4O6flSK8azRd1V3WvUaagDHf7cfdP6tFn3d
 
D9e41ctYPs9eK1dk8AJCovGsz\nXdVd9r1GgHHj7p/VIrwBlIO7f1aK8G6ocdxrBBi7Dt/9s2qEdw2GGR7YxnuNcNESKM9I4W37Kklv\nlfScpIck/XlE7CyjYcjCOvXAnjHGCQNDGXW0ya2SlkTEcZI2Sbp09Cahs/Jxwtc8nL3mr83mAdjf\nSOEdEd+IiD35242SFo7epJbaLM1bk70Y8tdfcZzwCmXTvSocwL7KHOf9XklfK3F97TGpomTMNoBR\nTdvnbfs2Sa8pzpIUki6PiK/ky1wu6fmIuGEsrUzcfr88Y8x2X4wTBoY3bXhHxOlTfW77PZLOlHTa\ntFtbX5iekLRo2m+gSxgnDEhbJG2dfrFRR5ssk3SxpJMj4rlpv3DqKFtL1+SKkjHbU2CcMLpukfYt\nbG/vv9hIj0GzvUnSgZKeymdtjIgPDli2249BmzQELt5Sb3PqwjhvYIZW9X8M2kiVdwR3ox4aFSWA\nEnFXQQBIEOFdE7oPAIyC8AaABBHeNepa9d21/QXGifAGgAQR3jWjGgUwG4R3A3QhwLuwj0CVCG8A\nSBDh3RBtrkzbvG9AXQjvBmljyLVxn4AmILwbpk1h16Z9AZqG8G6gNoReG/YBaDKeHt9Q3jTcU+ab\niOBGp1X0EG0q7wZLMQRTbDNQmgofok3l3XC9MGx6FU5oA30eebg7ezLUOG4HTeWdiKaGozc1t21A\nm1F5J6QpVThhDfRX5UO0Ce8E1RHiBDYwhAofok14J6wYqGUHOWENzFJFjzwkvFuiX9gOG+gENZAe\nwrvFCOWWqmgcMZqN8AZSko8jvrp3QeyRrI+VAO8ewhtISJXjiNFsjPMGgARReQMJqXIcMZqN8AZS\nUuE4YjQb4Q2kpqJxxGg2+rwBIEGENwAkiPAGgAQR3gCQIMIbABJEeANAgghvAEgQ4Q0ACSK8ASBB\npYS37Qtt77F9WBnrAwBMbeSfx9teKOl0Sd
 
tGbw4AjEnLHmJRRuW9WtLFJawHAMYjf4jFNQ9nr/lr\ns3kpG6nytn2WpO0Rca/tkpoEAOVq40Mspg1v27dJek1xlqSQ9GFJlynrMil+Ntj6wvSEpEVDthIA\numKLpK3TLzZteEfE6f3m236Dsgi+x1nZvVDS92yfEBE7+q7s1OkbBABlS+ohFou0b2F7e//FHBGl\nbM/2FknHR8QzAz4PrSplUwAma9nFuLFI9RitkiJiv16NMh/GEJqu2wRA+Xii/HBa9hCL0sI7Io4q\na10AhtfGi3GYHr+wBIAE8QxLIHFJXYxDaQhvIHU8Ub6TCG8gRX1GTtDH3S2EN5AaRpdAhDeQHEaX\nQGpDeKc68B4ARpB2eHP6iA5idAmkxMOb00d0EqNLoMTDG+gsRpd0XtLhzekjgK5KOrw5fQRajgEJ\nA6Ud3hKnj0BbMSBhSumHN4BWYkDC1Lp1V8EtdTegQTgWew06FpuleWuyV+oPqx0afxd7NfxYdKvy\n3iqem9mzVRyLnq3a/1h09ZR9qxrzd1H7gIStasyx6Kdb4Q0MiVP2BmBAwpQIbwDNxYCEgUp7APG0\nG7Kr2RAAtEy/BxBXFt4AgPJ0a7QJALQE4Q0ACepseNu+0PYe24fV3Za62L7K9gO277b9Rdvz625T\n1Wwvs/0j2w/avqTu9tTF9kLb37R9n+17bZ9bd5vqZnuO7btsr6u7Lf10MrxtL5R0uqRtdbelZrdK\nWhIRx0naJOnSmttTKdtzJF0j6QxJSySdY/v19baqNrslXRARSySdKOlDHT4WPedJur/uRgzSyfCW\ntFrSxXU3om4R8Y2I2JO/3ShpYZ3tqcEJkjZFxLaIeF7SWklvq7lNtYiIJyLi7nx6l6QHJC2ot1X1\nyQu8MyVdV3dbBulceNs+S9L2iLi37rY0zHslfa3uRlRsgaTthfc/UYcDq8f2hKTjJN1Rb0tq1Svw\nGjscr5U/0rF9m6TXF
 
Gcp+5/wYUmXKesyKX7WWlMci8sj4iv5MpdLej4ibqihiWgQ2/Mk3STpvLwC\n7xzbyyU9GRF3236TGpoRrQzviDi933zbb5A0Ieke21bWTfA92ydExI4Km1iZQceix/Z7lJ0enlZJ\ng5rlUUlHFN4vzOd1ku25yoL78xFxc93tqdFJks6yfaakgyQdbHtNRLy75nbto9M/0rG9RdLxEfFM\n3W2pg+1lkj4t6eSIeKru9lTN9gGSfizpzZIel3SnpHMi4oFaG1YT22sk/TQiLqi7LU1h+xRJF0bE\nWXW3ZbLO9XlPEmroKVFFPitpnqTb8iFR19bdoCpFxAuSViobdXOfpLUdDu6TJL1T0mm2v5//PSyr\nu10YrNOVNwCkquuVNwAkifAGgAQR3gCQIMIbABJEeANAgghvAEgQ4Q0ACSK8ASBB/w+3pZ0Fhfm4\n5gAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x10a969f50>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Plot the decision grid\n",
+    "%sql drop table if exists Z_D;\n",
+    "%sql SELECT madlib.svm_predict('svm_out1', 'grid_points', 'index', 
'Z_D');\n",
+    "Z_D = %sql SELECT decision_function from Z_D order by index\n",
+    "Z = np.array(Z_D)\n",
+    "Z = Z.reshape(xx.shape)\n",
+    "\n",
+    "# Orange is not novel, green is novel\n",
+    "plt.title(\"Novelty Detection\")\n",
+    "plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors='orange')\n",
+    "plt.contourf(xx, yy, Z, levels=[Z.min(), 0], colors='green')\n",
+    "#plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), Z.max(), 7), 
cmap=plt.cm.Blues_r)\n",
+    "#b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='blue')\n",
+    "c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red')\n",
+    "plt.axis('tight')\n",
+    "plt.xlim((-5, 5))\n",
+    "plt.ylim((-5, 5))\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

http://git-wip-us.apache.org/repos/asf/incubator-madlib-site/blob/86869127/community-artifacts/Path-demo-4.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Path-demo-4.ipynb 
b/community-artifacts/Path-demo-4.ipynb
new file mode 100644
index 0000000..57cbae3
--- /dev/null
+++ b/community-artifacts/Path-demo-4.ipynb
@@ -0,0 +1,908 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Path function\n",
+    "\n",
+    "The goal of the MADlib path function is to perform regular pattern 
matching over a sequence of rows, and to extract useful information about the 
pattern matches. The useful information could be a simple count of matches or 
something more involved like aggregations or window functions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "%load_ext sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "u'Connected: fmcquillan@madlib'"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %sql postgresql://gpdbchina@10.194.10.68:55000/madlib\n",
+    "%sql postgresql://fmcquillan@localhost:5432/madlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%sql select madlib.version();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The data set describes shopper behavior on a notional web site that sells 
beer and wine. A beacon fires an event to a log file when the shopper visits 
different pages on the site: landing page, beer selection page, wine selection 
page, and checkout. Other pages on the site like help pages show up in the logs 
as well. Let’s assume that the log has been sessionized."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "34 rows affected.\n",
+      "34 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>event_timestamp</th>\n",
+       "        <th>user_id</th>\n",
+       "        <th>session_id</th>\n",
+       "        <th>page</th>\n",
+       "        <th>revenue</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:03:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>100</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:04:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>100</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:05:00</td>\n",
+       "        <td>102201</td>\n",
+       "        <td>106</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:05:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>100</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>39.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:06:00</td>\n",
+       "        <td>102201</td>\n",
+       "        <td>106</td>\n",
+       "        <td>HELP</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:09:00</td>\n",
+       "        <td>102201</td>\n",
+       "        <td>106</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:15:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:16:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:17:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>15.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:18:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:19:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>HELP</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:21:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:22:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>23.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:06:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>101</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:09:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>101</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:15:00</td>\n",
+       "        <td>102201</td>\n",
+       "        <td>107</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:15:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>103</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:16:00</td>\n",
+       "        <td>102201</td>\n",
+       "        <td>107</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:16:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>103</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:17:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>103</td>\n",
+       "        <td>HELP</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:17:00</td>\n",
+       "        <td>103711</td>\n",
+       "        <td>109</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:17:00</td>\n",
+       "        <td>102201</td>\n",
+       "        <td>107</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:18:00</td>\n",
+       "        <td>102871</td>\n",
+       "        <td>108</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:18:00</td>\n",
+       "        <td>103711</td>\n",
+       "        <td>109</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:18:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>103</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:19:00</td>\n",
+       "        <td>103711</td>\n",
+       "        <td>109</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:19:00</td>\n",
+       "        <td>101331</td>\n",
+       "        <td>103</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>16.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:19:00</td>\n",
+       "        <td>102871</td>\n",
+       "        <td>108</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:22:00</td>\n",
+       "        <td>101443</td>\n",
+       "        <td>104</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:22:00</td>\n",
+       "        <td>102871</td>\n",
+       "        <td>108</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>21.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:25:00</td>\n",
+       "        <td>102871</td>\n",
+       "        <td>108</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:25:00</td>\n",
+       "        <td>101443</td>\n",
+       "        <td>104</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>12.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:29:00</td>\n",
+       "        <td>101881</td>\n",
+       "        <td>105</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 02:30:00</td>\n",
+       "        <td>101881</td>\n",
+       "        <td>105</td>\n",
+       "        <td>BEER</td>\n",
+       "        <td>0.0</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(datetime.datetime(2015, 4, 15, 1, 3), 100821, 100, u'LANDING', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 4), 100821, 100, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 5), 102201, 106, u'LANDING', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 5), 100821, 100, u'CHECKOUT', 
39.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 6), 102201, 106, u'HELP', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 9), 102201, 106, u'LANDING', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 15), 101121, 102, u'LANDING', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 16), 101121, 102, u'WINE', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 17), 101121, 102, u'CHECKOUT', 
15.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 18), 101121, 102, u'LANDING', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 19), 101121, 102, u'HELP', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 21), 101121, 102, u'WINE', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 22), 101121, 102, u'CHECKOUT', 
23.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 6), 100821, 101, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 9), 100821, 101, u'WINE', 0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 15), 102201, 107, u'WINE', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 15), 101331, 103, u'LANDING', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 16), 102201, 107, u'BEER', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 16), 101331, 103, u'WINE', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 17), 101331, 103, u'HELP', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 17), 103711, 109, u'BEER', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 17), 102201, 107, u'WINE', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 18), 102871, 108, u'BEER', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 18), 103711, 109, u'LANDING', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 18), 101331, 103, u'WINE', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 19), 103711, 109, u'WINE', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 19), 101331, 103, u'CHECKOUT', 
16.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 19), 102871, 108, u'WINE', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 22), 101443, 104, u'BEER', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 22), 102871, 108, u'CHECKOUT', 
21.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 25), 102871, 108, u'LANDING', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 25), 101443, 104, u'CHECKOUT', 
12.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 29), 101881, 105, u'LANDING', 
0.0),\n",
+       " (datetime.datetime(2015, 4, 15, 2, 30), 101881, 105, u'BEER', 0.0)]"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql \n",
+    "DROP TABLE IF EXISTS eventlog, path_output, path_output_tuples 
CASCADE;\n",
+    "CREATE TABLE eventlog (event_timestamp TIMESTAMP,\n",
+    "            user_id INT,\n",
+    "            session_id INT,\n",
+    "            page TEXT,\n",
+    "            revenue FLOAT);\n",
+    "INSERT INTO eventlog VALUES\n",
+    "('04/15/2015 01:03:00', 100821, 100, 'LANDING', 0),\n",
+    "('04/15/2015 01:04:00', 100821, 100, 'WINE', 0),\n",
+    "('04/15/2015 01:05:00', 100821, 100, 'CHECKOUT', 39),\n",
+    "('04/15/2015 02:06:00', 100821, 101, 'WINE', 0),\n",
+    "('04/15/2015 02:09:00', 100821, 101, 'WINE', 0),\n",
+    "('04/15/2015 01:15:00', 101121, 102, 'LANDING', 0),\n",
+    "('04/15/2015 01:16:00', 101121, 102, 'WINE', 0),\n",
+    "('04/15/2015 01:17:00', 101121, 102, 'CHECKOUT', 15),\n",
+    "('04/15/2015 01:18:00', 101121, 102, 'LANDING', 0),\n",
+    "('04/15/2015 01:19:00', 101121, 102, 'HELP', 0),\n",
+    "('04/15/2015 01:21:00', 101121, 102, 'WINE', 0),\n",
+    "('04/15/2015 01:22:00', 101121, 102, 'CHECKOUT', 23),\n",
+    "('04/15/2015 02:15:00', 101331, 103, 'LANDING', 0),\n",
+    "('04/15/2015 02:16:00', 101331, 103, 'WINE', 0),\n",
+    "('04/15/2015 02:17:00', 101331, 103, 'HELP', 0),\n",
+    "('04/15/2015 02:18:00', 101331, 103, 'WINE', 0),\n",
+    "('04/15/2015 02:19:00', 101331, 103, 'CHECKOUT', 16),\n",
+    "('04/15/2015 02:22:00', 101443, 104, 'BEER', 0),\n",
+    "('04/15/2015 02:25:00', 101443, 104, 'CHECKOUT', 12),\n",
+    "('04/15/2015 02:29:00', 101881, 105, 'LANDING', 0),\n",
+    "('04/15/2015 02:30:00', 101881, 105, 'BEER', 0),\n",
+    "('04/15/2015 01:05:00', 102201, 106, 'LANDING', 0),\n",
+    "('04/15/2015 01:06:00', 102201, 106, 'HELP', 0),\n",
+    "('04/15/2015 01:09:00', 102201, 106, 'LANDING', 0),\n",
+    "('04/15/2015 02:15:00', 102201, 107, 'WINE', 0),\n",
+    "('04/15/2015 02:16:00', 102201, 107, 'BEER', 0),\n",
+    "('04/15/2015 02:17:00', 102201, 107, 'WINE', 0),\n",
+    "('04/15/2015 02:18:00', 102871, 108, 'BEER', 0),\n",
+    "('04/15/2015 02:19:00', 102871, 108, 'WINE', 0),\n",
+    "('04/15/2015 02:22:00', 102871, 108, 'CHECKOUT', 21),\n",
+    "('04/15/2015 02:25:00', 102871, 108, 'LANDING', 0),\n",
+    "('04/15/2015 02:17:00', 103711, 109, 'BEER', 0),\n",
+    "('04/15/2015 02:18:00', 103711, 109, 'LANDING', 0),\n",
+    "('04/15/2015 02:19:00', 103711, 109, 'WINE', 0);\n",
+    "\n",
+    "SELECT * FROM eventlog ORDER BY event_timestamp ASC;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Calculate the revenue by checkout:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "%%sql\n",
+    "SELECT madlib.path(\n",
+    "     'eventlog',                -- Name of input table\n",
+    "     'path_output',             -- Table name to store path results\n",
+    "     'session_id',              -- Partition input table by session\n",
+    "     'event_timestamp ASC',     -- Order partitions in input table by 
time\n",
+    "     'buy:=page=''CHECKOUT''',  -- Define a symbol for checkout events\n",
+    "     '(buy)',                   -- Pattern search: purchase\n",
+    "     'sum(revenue) as checkout_rev',    -- Aggregate:  sum revenue by 
checkout\n",
+    "     TRUE                       -- Persist matches\n",
+    "     );\n",
+    "SELECT * FROM path_output ORDER BY session_id, match_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that there are 2 checkouts within session 102, which is apparent 
from the 'match_id' column. This serves to illustrate that the 'aggregate_func' 
operates on a per pattern match basis, not on a per partition basis. If in fact 
we wanted revenue by partition ('session_id' in this example), then we could 
do:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%sql\n",
+    "SELECT session_id, sum(checkout_rev) FROM path_output GROUP BY session_id 
ORDER BY session_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Since we set TRUE for 'persist_rows', we can view the associated pattern 
matches:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM path_output_tuples ORDER BY session_id ASC, event_timestamp 
ASC;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notice that the 'symbol' and 'match_id' columns are added to the right of 
the matched rows.\n",
+    "\n",
+    "We are interested in sessions with an order placed within 4 pages of 
entering the shopping site via the landing page. We represent this by the 
regular expression: '(land)[^(land)(buy)]{0,2}(buy)'. In other words, visit to 
the landing page followed by from 0 to 2 non-entry, non-sale pages, followed by 
a purchase. The SQL is as follows:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "3 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>session_id</th>\n",
+       "        <th>match_id</th>\n",
+       "        <th>checkout_rev</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>100</td>\n",
+       "        <td>1.0</td>\n",
+       "        <td>39.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>102</td>\n",
+       "        <td>1.0</td>\n",
+       "        <td>15.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>102</td>\n",
+       "        <td>2.0</td>\n",
+       "        <td>23.0</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(100, 1.0, 39.0), (102, 1.0, 15.0), (102, 2.0, 23.0)]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS path_output, path_output_tuples;\n",
+    "SELECT madlib.path(\n",
+    "     'eventlog',                -- Name of input table\n",
+    "     'path_output',             -- Table name to store path results\n",
+    "     'session_id',              -- Partition input table by session\n",
+    "     'event_timestamp ASC',     -- Order partitions in input table by 
time\n",
+    "      $$ land:=page='LANDING',\n",
+    "        wine:=page='WINE',\n",
+    "        beer:=page='BEER',\n",
+    "        buy:=page='CHECKOUT',\n",
+    "        other:=page<>'LANDING' AND page<>'WINE' AND page<>'BEER' AND  
page<>'CHECKOUT'\n",
+    "        $$,                     -- Symbols for page types\n",
+    "\n",
+    "      '(land)[^(land)(buy)]{0,2}(buy)', -- Purchase within 4 pages 
entering site\n",
+    "     'sum(revenue) as checkout_rev',    -- Aggregate:  sum revenue by 
checkout\n",
+    "     TRUE                       -- Persist matches\n",
+    "     );\n",
+    "SELECT * FROM path_output ORDER BY session_id, match_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now view the associated pattern matches:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>event_timestamp</th>\n",
+       "        <th>user_id</th>\n",
+       "        <th>session_id</th>\n",
+       "        <th>page</th>\n",
+       "        <th>revenue</th>\n",
+       "        <th>symbol</th>\n",
+       "        <th>match_id</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:03:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>100</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>land</td>\n",
+       "        <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:04:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>100</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>wine</td>\n",
+       "        <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:05:00</td>\n",
+       "        <td>100821</td>\n",
+       "        <td>100</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>39.0</td>\n",
+       "        <td>buy</td>\n",
+       "        <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:15:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>land</td>\n",
+       "        <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:16:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>wine</td>\n",
+       "        <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:17:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>15.0</td>\n",
+       "        <td>buy</td>\n",
+       "        <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:18:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>LANDING</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>land</td>\n",
+       "        <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:19:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>HELP</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>other</td>\n",
+       "        <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:21:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>WINE</td>\n",
+       "        <td>0.0</td>\n",
+       "        <td>wine</td>\n",
+       "        <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2015-04-15 01:22:00</td>\n",
+       "        <td>101121</td>\n",
+       "        <td>102</td>\n",
+       "        <td>CHECKOUT</td>\n",
+       "        <td>23.0</td>\n",
+       "        <td>buy</td>\n",
+       "        <td>2.0</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(datetime.datetime(2015, 4, 15, 1, 3), 100821, 100, u'LANDING', 0.0, 
u'land', 1.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 4), 100821, 100, u'WINE', 0.0, 
u'wine', 1.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 5), 100821, 100, u'CHECKOUT', 
39.0, u'buy', 1.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 15), 101121, 102, u'LANDING', 0.0, 
u'land', 1.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 16), 101121, 102, u'WINE', 0.0, 
u'wine', 1.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 17), 101121, 102, u'CHECKOUT', 
15.0, u'buy', 1.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 18), 101121, 102, u'LANDING', 0.0, 
u'land', 2.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 19), 101121, 102, u'HELP', 0.0, 
u'other', 2.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 21), 101121, 102, u'WINE', 0.0, 
u'wine', 2.0),\n",
+       " (datetime.datetime(2015, 4, 15, 1, 22), 101121, 102, u'CHECKOUT', 
23.0, u'buy', 2.0)]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM path_output_tuples ORDER BY session_id ASC, event_timestamp 
ASC;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For instances where a purchase is made within 4 pages of entering a site, 
compute the elasped time to checkout:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS path_output, path_output_tuples;\n",
+    "SELECT madlib.path(\n",
+    "     'eventlog',                -- Name of input table\n",
+    "     'path_output',             -- Table name to store path results\n",
+    "     'session_id',              -- Partition input table by session\n",
+    "     'event_timestamp ASC',     -- Order partitions in input table by 
time\n",
+    "      $$ land:=page='LANDING',\n",
+    "        wine:=page='WINE',\n",
+    "        beer:=page='BEER',\n",
+    "        buy:=page='CHECKOUT',\n",
+    "        other:=page<>'LANDING' AND page<>'WINE' AND page<>'BEER' AND  
page<>'CHECKOUT'\n",
+    "        $$,                     -- Symbols for page types\n",
+    "      '(land)[^(land)(buy)]{0,2}(buy)', -- Purchase within 4 pages 
entering site\n",
+    "     '(max(event_timestamp)-min(event_timestamp)) as elapsed_time',    -- 
Aggregate: elapsed time\n",
+    "     TRUE                       -- Persist matches\n",
+    "     );\n",
+    "SELECT * FROM path_output ORDER BY session_id, match_id;\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We may want to use a window function instead of an aggregate. You can 
write window functions on the output tuples to achieve the desired result.   
Continuing the previous example, let’s say we want to compute average revenue 
for checkouts within 4 pages of entering the shopping site via the landing 
page:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%sql\n",
+    "SELECT DATE(event_timestamp), user_id, session_id, revenue,\n",
+    "    avg(revenue) OVER (PARTITION BY DATE(event_timestamp)) as 
avg_checkout_rev\n",
+    "    FROM path_output_tuples\n",
+    "    WHERE page='CHECKOUT'\n",
+    "    ORDER BY user_id, session_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we want to do a golden path analysis to find the most successful 
shopper paths through the site. Since our data set is small, we decide this 
means the most frequently viewed page just before a checkout is made:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS path_output, path_output_tuples;\n",
+    "SELECT madlib.path(\n",
+    "     'eventlog',                -- Name of input table\n",
+    "     'path_output',             -- Table name to store path results\n",
+    "     'session_id',              -- Partition input table by session\n",
+    "     'event_timestamp ASC',     -- Order partitions in input table by 
time\n",
+    "      $$ land:=page='LANDING',\n",
+    "        wine:=page='WINE',\n",
+    "        beer:=page='BEER',\n",
+    "        buy:=page='CHECKOUT',\n",
+    "        other:=page<>'LANDING' AND page<>'WINE' AND page<>'BEER' AND  
page<>'CHECKOUT'\n",
+    "        $$,                     -- Symbols for page types\n",
+    "      '[^(buy)](buy)',          -- Pattern to match\n",
+    "     'array_agg(page ORDER BY session_id ASC, event_timestamp ASC) as 
page_path');\n",
+    "     \n",
+    "SELECT count(*), page_path from\n",
+    "    (SELECT * FROM path_output) q\n",
+    "GROUP BY page_path\n",
+    "ORDER BY count(*) DESC\n",
+    "LIMIT 10;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There are only 2 different paths. The wine page is viewed more frequently 
than the beer page just before checkout.\n",
+    "\n",
+    "To demonstrate the use of 'overlapping_patterns', consider a pattern with 
at least one page followed by and ending with a checkout:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS path_output, path_output_tuples;\n",
+    "SELECT madlib.path(                                                       
            \n",
+    "     'eventlog',                    -- Name of the table                  
                         \n",
+    "     'path_output',                 -- Table name to store the path 
results                         \n",
+    "     'session_id',                  -- Partition by session               
  \n",
+    "     'event_timestamp ASC',         -- Order partitions in input table by 
time       \n",
+    "     $$ nobuy:=page<>'CHECKOUT',\n",
+    "        buy:=page='CHECKOUT'\n",
+    "     $$,  -- Definition of symbols used in the pattern definition \n",
+    "     '(nobuy)+(buy)',         -- At least one page followed by and ending 
with a CHECKOUT.\n",
+    "     'array_agg(page ORDER BY session_id ASC, event_timestamp ASC) as 
page_path',  \n",
+    "     FALSE,                        -- Don't persist matches\n",
+    "     TRUE                          -- Turn on overlapping patterns\n",
+    "     );\n",
+    "SELECT * FROM path_output ORDER BY session_id, match_id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With overlap turned off, the result is:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS path_output, path_output_tuples;\n",
+    "SELECT madlib.path(                                                       
            \n",
+    "     'eventlog',                    -- Name of the table                  
                         \n",
+    "     'path_output',                 -- Table name to store the path 
results                         \n",
+    "     'session_id',                  -- Partition by session               
  \n",
+    "     'event_timestamp ASC',         -- Order partitions in input table by 
time       \n",
+    "     $$ nobuy:=page<>'CHECKOUT',\n",
+    "        buy:=page='CHECKOUT'\n",
+    "     $$,  -- Definition of symbols used in the pattern definition \n",
+    "     '(nobuy)+(buy)',         -- At least one page followed by and ending 
with a CHECKOUT.\n",
+    "     'array_agg(page ORDER BY session_id ASC, event_timestamp ASC) as 
page_path',  \n",
+    "     FALSE,                        -- Don't persist matches\n",
+    "     FALSE                          -- Turn on overlapping patterns\n",
+    "     );\n",
+    "SELECT * FROM path_output ORDER BY session_id, match_id;"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

Reply via email to