[11/15] madlib-site git commit: jupyter notebooks for 1.14 release

2018-04-23 Thread fmcquillan
http://git-wip-us.apache.org/repos/asf/madlib-site/blob/418f361c/community-artifacts/Encoding-categorical-variables-v2.ipynb
--
diff --git a/community-artifacts/Encoding-categorical-variables-v2.ipynb 
b/community-artifacts/Encoding-categorical-variables-v2.ipynb
new file mode 100644
index 000..5e4cb6f
--- /dev/null
+++ b/community-artifacts/Encoding-categorical-variables-v2.ipynb
@@ -0,0 +1,4026 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+"# Encoding categorical variables\n",
+"This is the new module that replaces create_indicator_variables() which 
was deprecated as of MADlib v1.10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+{
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+  
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: 
ShimWarning: The `IPython.config` package has been deprecated. You should 
import from traitlets.config instead.\n",
+  "  \"You should import from traitlets.config instead.\", ShimWarning)\n",
+  
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5:
 UserWarning: IPython.utils.traitlets has moved to a top-level traitlets 
package.\n",
+  "  warn(\"IPython.utils.traitlets has moved to a top-level traitlets 
package.\")\n"
+ ]
+}
+   ],
+   "source": [
+"%load_ext sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+{
+ "data": {
+  "text/plain": [
+   "u'Connected: gpadmin@madlib'"
+  ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+}
+   ],
+   "source": [
+"# Greenplum Database 5.4.0 on GCP (demo machine)\n",
+"%sql postgresql://gpadmin@35.184.253.255:5432/madlib\n",
+"\n",
+"# PostgreSQL local\n",
+"#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
+"\n",
+"# Greenplum Database 4.3.10.0\n",
+"#%sql postgresql://gpdbchina@10.194.10.68:61000/madlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+{
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+  "1 rows affected.\n"
+ ]
+},
+{
+ "data": {
+  "text/html": [
+   "\n",
+   "\n",
+   "version\n",
+   "\n",
+   "\n",
+   "MADlib version: 1.14-dev, git revision: 
rc/1.13-rc1-21-g3af2d70, cmake configuration time: Mon Feb 26 18:00:54 UTC 
2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C 
compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7\n",
+   "\n",
+   ""
+  ],
+  "text/plain": [
+   "[(u'MADlib version: 1.14-dev, git revision: rc/1.13-rc1-21-g3af2d70, 
cmake configuration time: Mon Feb 26 18:00:54 UTC 2018, build type: release, 
build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ 
compiler: g++ 4.4.7',)]"
+  ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+}
+   ],
+   "source": [
+"%sql select madlib.version();\n",
+"#%sql select version();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+"## 1.  Load data set\n",
+"Use a subset of the abalone dataset:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+{
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+  "Done.\n",
+  "Done.\n",
+  "20 rows affected.\n",
+  "20 rows affected.\n"
+ ]
+},
+{
+ "data": {
+  "text/html": [
+   "\n",
+   "\n",
+   "id\n",
+   "sex\n",
+   "length\n",
+   "diameter\n",
+   "height\n",
+   "rings\n",
+   "\n",
+   "\n",
+   "1\n",
+   "M\n",
+   "0.455\n",
+   "0.365\n",
+   "0.095\n",
+   "15\n",
+   "\n",
+   "\n",
+   "2\n",
+   "M\n",
+   "0.35\n",
+   "0.265\n",
+   "0.09\n",
+   "7\n",
+   "\n",
+   "\n",
+   "3\n",
+   "F\n",
+   "0.53\n",
+   "0.42\n",
+   "0.135\n",
+   "9\n",
+   "\n",
+   "\n",
+   "4\n",
+   "M\n",
+   "0.44\n",
+   "0.365\n",
+   "0.125\n",
+   "10\n",
+   "\n",
+   "\n",
+   "5\n",
+   "I\n",
+   "0.33\n",
+   "0.255\n",
+   "0.08\n",
+   "7\n",
+   "\n",
+   "\n",
+

[11/15] madlib-site git commit: jupyter notebooks for 1.14 release

2018-04-23 Thread fmcquillan
http://git-wip-us.apache.org/repos/asf/madlib-site/blob/3f849b9e/community-artifacts/Encoding-categorical-variables-v2.ipynb
--
diff --git a/community-artifacts/Encoding-categorical-variables-v2.ipynb 
b/community-artifacts/Encoding-categorical-variables-v2.ipynb
new file mode 100644
index 000..5e4cb6f
--- /dev/null
+++ b/community-artifacts/Encoding-categorical-variables-v2.ipynb
@@ -0,0 +1,4026 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+"# Encoding categorical variables\n",
+"This is the new module that replaces create_indicator_variables() which 
was deprecated as of MADlib v1.10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+{
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+  
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: 
ShimWarning: The `IPython.config` package has been deprecated. You should 
import from traitlets.config instead.\n",
+  "  \"You should import from traitlets.config instead.\", ShimWarning)\n",
+  
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5:
 UserWarning: IPython.utils.traitlets has moved to a top-level traitlets 
package.\n",
+  "  warn(\"IPython.utils.traitlets has moved to a top-level traitlets 
package.\")\n"
+ ]
+}
+   ],
+   "source": [
+"%load_ext sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+{
+ "data": {
+  "text/plain": [
+   "u'Connected: gpadmin@madlib'"
+  ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+}
+   ],
+   "source": [
+"# Greenplum Database 5.4.0 on GCP (demo machine)\n",
+"%sql postgresql://gpadmin@35.184.253.255:5432/madlib\n",
+"\n",
+"# PostgreSQL local\n",
+"#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
+"\n",
+"# Greenplum Database 4.3.10.0\n",
+"#%sql postgresql://gpdbchina@10.194.10.68:61000/madlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+{
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+  "1 rows affected.\n"
+ ]
+},
+{
+ "data": {
+  "text/html": [
+   "\n",
+   "\n",
+   "version\n",
+   "\n",
+   "\n",
+   "MADlib version: 1.14-dev, git revision: 
rc/1.13-rc1-21-g3af2d70, cmake configuration time: Mon Feb 26 18:00:54 UTC 
2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C 
compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7\n",
+   "\n",
+   ""
+  ],
+  "text/plain": [
+   "[(u'MADlib version: 1.14-dev, git revision: rc/1.13-rc1-21-g3af2d70, 
cmake configuration time: Mon Feb 26 18:00:54 UTC 2018, build type: release, 
build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ 
compiler: g++ 4.4.7',)]"
+  ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+}
+   ],
+   "source": [
+"%sql select madlib.version();\n",
+"#%sql select version();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+"## 1.  Load data set\n",
+"Use a subset of the abalone dataset:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+{
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+  "Done.\n",
+  "Done.\n",
+  "20 rows affected.\n",
+  "20 rows affected.\n"
+ ]
+},
+{
+ "data": {
+  "text/html": [
+   "\n",
+   "\n",
+   "id\n",
+   "sex\n",
+   "length\n",
+   "diameter\n",
+   "height\n",
+   "rings\n",
+   "\n",
+   "\n",
+   "1\n",
+   "M\n",
+   "0.455\n",
+   "0.365\n",
+   "0.095\n",
+   "15\n",
+   "\n",
+   "\n",
+   "2\n",
+   "M\n",
+   "0.35\n",
+   "0.265\n",
+   "0.09\n",
+   "7\n",
+   "\n",
+   "\n",
+   "3\n",
+   "F\n",
+   "0.53\n",
+   "0.42\n",
+   "0.135\n",
+   "9\n",
+   "\n",
+   "\n",
+   "4\n",
+   "M\n",
+   "0.44\n",
+   "0.365\n",
+   "0.125\n",
+   "10\n",
+   "\n",
+   "\n",
+   "5\n",
+   "I\n",
+   "0.33\n",
+   "0.255\n",
+   "0.08\n",
+   "7\n",
+   "\n",
+   "\n",
+