[11/15] madlib-site git commit: jupyter notebooks for 1.14 release
http://git-wip-us.apache.org/repos/asf/madlib-site/blob/418f361c/community-artifacts/Encoding-categorical-variables-v2.ipynb -- diff --git a/community-artifacts/Encoding-categorical-variables-v2.ipynb b/community-artifacts/Encoding-categorical-variables-v2.ipynb new file mode 100644 index 000..5e4cb6f --- /dev/null +++ b/community-artifacts/Encoding-categorical-variables-v2.ipynb @@ -0,0 +1,4026 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ +"# Encoding categorical variables\n", +"This is the new module that replaces create_indicator_variables() which was deprecated as of MADlib v1.10" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ +{ + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.\n", + " \"You should import from traitlets.config instead.\", ShimWarning)\n", + "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n", + " warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n" + ] +} + ], + "source": [ +"%load_ext sql" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ +{ + "data": { + "text/plain": [ + "u'Connected: gpadmin@madlib'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" +} + ], + "source": [ +"# Greenplum Database 5.4.0 on GCP (demo machine)\n", +"%sql postgresql://gpadmin@35.184.253.255:5432/madlib\n", +"\n", +"# PostgreSQL local\n", +"#%sql postgresql://fmcquillan@localhost:5432/madlib\n", +"\n", +"# Greenplum Database 4.3.10.0\n", +"#%sql postgresql://gpdbchina@10.194.10.68:61000/madlib" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ +{ + "name": "stdout", + "output_type": "stream", + "text": [ + "1 rows affected.\n" + ] +}, +{ + "data": { + "text/html": [ + "\n", + "\n", + "version\n", + "\n", + "\n", + "MADlib version: 1.14-dev, git revision: rc/1.13-rc1-21-g3af2d70, cmake configuration time: Mon Feb 26 18:00:54 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7\n", + "\n", + "" + ], + "text/plain": [ + "[(u'MADlib version: 1.14-dev, git revision: rc/1.13-rc1-21-g3af2d70, cmake configuration time: Mon Feb 26 18:00:54 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7',)]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" +} + ], + "source": [ +"%sql select madlib.version();\n", +"#%sql select version();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ +"## 1. Load data set\n", +"Use a subset of the abalone dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ +{ + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "Done.\n", + "20 rows affected.\n", + "20 rows affected.\n" + ] +}, +{ + "data": { + "text/html": [ + "\n", + "\n", + "id\n", + "sex\n", + "length\n", + "diameter\n", + "height\n", + "rings\n", + "\n", + "\n", + "1\n", + "M\n", + "0.455\n", + "0.365\n", + "0.095\n", + "15\n", + "\n", + "\n", + "2\n", + "M\n", + "0.35\n", + "0.265\n", + "0.09\n", + "7\n", + "\n", + "\n", + "3\n", + "F\n", + "0.53\n", + "0.42\n", + "0.135\n", + "9\n", + "\n", + "\n", + "4\n", + "M\n", + "0.44\n", + "0.365\n", + "0.125\n", + "10\n", + "\n", + "\n", + "5\n", + "I\n", + "0.33\n", + "0.255\n", + "0.08\n", + "7\n", + "\n", + "\n", +
[11/15] madlib-site git commit: jupyter notebooks for 1.14 release
http://git-wip-us.apache.org/repos/asf/madlib-site/blob/3f849b9e/community-artifacts/Encoding-categorical-variables-v2.ipynb -- diff --git a/community-artifacts/Encoding-categorical-variables-v2.ipynb b/community-artifacts/Encoding-categorical-variables-v2.ipynb new file mode 100644 index 000..5e4cb6f --- /dev/null +++ b/community-artifacts/Encoding-categorical-variables-v2.ipynb @@ -0,0 +1,4026 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ +"# Encoding categorical variables\n", +"This is the new module that replaces create_indicator_variables() which was deprecated as of MADlib v1.10" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ +{ + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.\n", + " \"You should import from traitlets.config instead.\", ShimWarning)\n", + "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n", + " warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n" + ] +} + ], + "source": [ +"%load_ext sql" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ +{ + "data": { + "text/plain": [ + "u'Connected: gpadmin@madlib'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" +} + ], + "source": [ +"# Greenplum Database 5.4.0 on GCP (demo machine)\n", +"%sql postgresql://gpadmin@35.184.253.255:5432/madlib\n", +"\n", +"# PostgreSQL local\n", +"#%sql postgresql://fmcquillan@localhost:5432/madlib\n", +"\n", +"# Greenplum Database 4.3.10.0\n", +"#%sql postgresql://gpdbchina@10.194.10.68:61000/madlib" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ +{ + "name": "stdout", + "output_type": "stream", + "text": [ + "1 rows affected.\n" + ] +}, +{ + "data": { + "text/html": [ + "\n", + "\n", + "version\n", + "\n", + "\n", + "MADlib version: 1.14-dev, git revision: rc/1.13-rc1-21-g3af2d70, cmake configuration time: Mon Feb 26 18:00:54 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7\n", + "\n", + "" + ], + "text/plain": [ + "[(u'MADlib version: 1.14-dev, git revision: rc/1.13-rc1-21-g3af2d70, cmake configuration time: Mon Feb 26 18:00:54 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7',)]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" +} + ], + "source": [ +"%sql select madlib.version();\n", +"#%sql select version();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ +"## 1. Load data set\n", +"Use a subset of the abalone dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ +{ + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "Done.\n", + "20 rows affected.\n", + "20 rows affected.\n" + ] +}, +{ + "data": { + "text/html": [ + "\n", + "\n", + "id\n", + "sex\n", + "length\n", + "diameter\n", + "height\n", + "rings\n", + "\n", + "\n", + "1\n", + "M\n", + "0.455\n", + "0.365\n", + "0.095\n", + "15\n", + "\n", + "\n", + "2\n", + "M\n", + "0.35\n", + "0.265\n", + "0.09\n", + "7\n", + "\n", + "\n", + "3\n", + "F\n", + "0.53\n", + "0.42\n", + "0.135\n", + "9\n", + "\n", + "\n", + "4\n", + "M\n", + "0.44\n", + "0.365\n", + "0.125\n", + "10\n", + "\n", + "\n", + "5\n", + "I\n", + "0.33\n", + "0.255\n", + "0.08\n", + "7\n", + "\n", + "\n", +