Repository: madlib-site
Updated Branches:
  refs/heads/asf-site 5fa1ac070 -> acd339f65


http://git-wip-us.apache.org/repos/asf/madlib-site/blob/acd339f6/community-artifacts/stratified-sampling-v1.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/stratified-sampling-v1.ipynb 
b/community-artifacts/stratified-sampling-v1.ipynb
deleted file mode 100644
index 75e02fd..0000000
--- a/community-artifacts/stratified-sampling-v1.ipynb
+++ /dev/null
@@ -1,672 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Stratified sampling\n",
-    "Stratified sampling is a method for sampling subpopulations (strata) 
independently. It is commonly used to reduce sampling error by ensuring that 
subgroups are adequately represented in the sample.\n",
-    "\n",
-    "Stratified sampling was added in MADlib 1.12."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The sql extension is already loaded. To reload it, use:\n",
-      "  %reload_ext sql\n"
-     ]
-    }
-   ],
-   "source": [
-    "%load_ext sql"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "u'Connected: gpdbchina@madlib'"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Greenplum 4.3.10.0\n",
-    "%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n",
-    "        \n",
-    "# PostgreSQL local\n",
-    "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
-    "\n",
-    "# Greenplum 4.2.3.0\n",
-    "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>version</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>MADlib version: 1.12-dev, git revision: 
rel/v1.11-23-gfdf7b6d, cmake configuration time: Wed Jun 28 18:06:35 UTC 2017, 
build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C 
compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(u'MADlib version: 1.12-dev, git revision: rel/v1.11-23-gfdf7b6d, 
cmake configuration time: Wed Jun 28 18:06:35 UTC 2017, build type: Release, 
build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, 
C++ compiler: g++ 4.4.0',)]"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%sql select madlib.version();\n",
-    "#%sql select version();"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 1.  Create input table"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "Done.\n",
-      "25 rows affected.\n",
-      "25 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>id1</th>\n",
-       "        <th>id2</th>\n",
-       "        <th>gr1</th>\n",
-       "        <th>gr2</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>3</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>4</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>5</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>6</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>7</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>8</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>2</td>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>3</td>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>4</td>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>5</td>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>0</td>\n",
-       "        <td>6</td>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>10</td>\n",
-       "        <td>10</td>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>20</td>\n",
-       "        <td>20</td>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>30</td>\n",
-       "        <td>30</td>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>40</td>\n",
-       "        <td>40</td>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>50</td>\n",
-       "        <td>50</td>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>60</td>\n",
-       "        <td>60</td>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>70</td>\n",
-       "        <td>70</td>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 0, 1, 1),\n",
-       " (2, 0, 1, 1),\n",
-       " (3, 0, 1, 1),\n",
-       " (4, 0, 1, 1),\n",
-       " (5, 0, 1, 1),\n",
-       " (6, 0, 1, 1),\n",
-       " (7, 0, 1, 1),\n",
-       " (8, 0, 1, 1),\n",
-       " (9, 0, 1, 1),\n",
-       " (9, 0, 1, 1),\n",
-       " (9, 0, 1, 1),\n",
-       " (9, 0, 1, 1),\n",
-       " (0, 1, 1, 2),\n",
-       " (0, 2, 1, 2),\n",
-       " (0, 3, 1, 2),\n",
-       " (0, 4, 1, 2),\n",
-       " (0, 5, 1, 2),\n",
-       " (0, 6, 1, 2),\n",
-       " (10, 10, 2, 2),\n",
-       " (20, 20, 2, 2),\n",
-       " (30, 30, 2, 2),\n",
-       " (40, 40, 2, 2),\n",
-       " (50, 50, 2, 2),\n",
-       " (60, 60, 2, 2),\n",
-       " (70, 70, 2, 2)]"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql \n",
-    "DROP TABLE IF EXISTS test;\n",
-    "\n",
-    "CREATE TABLE test(\n",
-    "    id1 INTEGER,\n",
-    "    id2 INTEGER,\n",
-    "    gr1 INTEGER,\n",
-    "    gr2 INTEGER\n",
-    ");\n",
-    "\n",
-    "INSERT INTO test VALUES\n",
-    "(1,0,1,1),\n",
-    "(2,0,1,1),\n",
-    "(3,0,1,1),\n",
-    "(4,0,1,1),\n",
-    "(5,0,1,1),\n",
-    "(6,0,1,1),\n",
-    "(7,0,1,1),\n",
-    "(8,0,1,1),\n",
-    "(9,0,1,1),\n",
-    "(9,0,1,1),\n",
-    "(9,0,1,1),\n",
-    "(9,0,1,1),\n",
-    "(0,1,1,2),\n",
-    "(0,2,1,2),\n",
-    "(0,3,1,2),\n",
-    "(0,4,1,2),\n",
-    "(0,5,1,2),\n",
-    "(0,6,1,2),\n",
-    "(10,10,2,2),\n",
-    "(20,20,2,2),\n",
-    "(30,30,2,2),\n",
-    "(40,40,2,2),\n",
-    "(50,50,2,2),\n",
-    "(60,60,2,2),\n",
-    "(70,70,2,2);\n",
-    "\n",
-    "SELECT * FROM test ORDER BY gr1, gr2, id1, id2;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 2.  Sample without replacement"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "13 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>gr1</th>\n",
-       "        <th>gr2</th>\n",
-       "        <th>id1</th>\n",
-       "        <th>id2</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>3</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>6</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "        <td>30</td>\n",
-       "        <td>30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "        <td>40</td>\n",
-       "        <td>40</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "        <td>50</td>\n",
-       "        <td>50</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "        <td>70</td>\n",
-       "        <td>70</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 1, 1, 0),\n",
-       " (1, 1, 2, 0),\n",
-       " (1, 1, 3, 0),\n",
-       " (1, 1, 6, 0),\n",
-       " (1, 1, 9, 0),\n",
-       " (1, 1, 9, 0),\n",
-       " (1, 2, 0, 1),\n",
-       " (1, 2, 0, 2),\n",
-       " (1, 2, 0, 5),\n",
-       " (2, 2, 30, 30),\n",
-       " (2, 2, 40, 40),\n",
-       " (2, 2, 50, 50),\n",
-       " (2, 2, 70, 70)]"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS out;\n",
-    "\n",
-    "SELECT madlib.stratified_sample(\n",
-    "                                'test',    -- Source table\n",
-    "                                'out',     -- Output table\n",
-    "                                0.5,       -- Sample proportion\n",
-    "                                'gr1,gr2', -- Strata definition\n",
-    "                                'id1,id2', -- Columns to output\n",
-    "                                FALSE);    -- Sample without 
replacement\n",
-    "\n",
-    "SELECT * FROM out ORDER BY gr1,gr2,id1,id2;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 3.  Sample with replacement"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Done.\n",
-      "1 rows affected.\n",
-      "13 rows affected.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<table>\n",
-       "    <tr>\n",
-       "        <th>gr1</th>\n",
-       "        <th>gr2</th>\n",
-       "        <th>id1</th>\n",
-       "        <th>id2</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>3</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>4</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>8</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>1</td>\n",
-       "        <td>9</td>\n",
-       "        <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>1</td>\n",
-       "        <td>2</td>\n",
-       "        <td>0</td>\n",
-       "        <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "        <td>20</td>\n",
-       "        <td>20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "        <td>20</td>\n",
-       "        <td>20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "        <td>40</td>\n",
-       "        <td>40</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "        <td>2</td>\n",
-       "        <td>2</td>\n",
-       "        <td>40</td>\n",
-       "        <td>40</td>\n",
-       "    </tr>\n",
-       "</table>"
-      ],
-      "text/plain": [
-       "[(1, 1, 1, 0),\n",
-       " (1, 1, 2, 0),\n",
-       " (1, 1, 3, 0),\n",
-       " (1, 1, 4, 0),\n",
-       " (1, 1, 8, 0),\n",
-       " (1, 1, 9, 0),\n",
-       " (1, 2, 0, 5),\n",
-       " (1, 2, 0, 5),\n",
-       " (1, 2, 0, 6),\n",
-       " (2, 2, 20, 20),\n",
-       " (2, 2, 20, 20),\n",
-       " (2, 2, 40, 40),\n",
-       " (2, 2, 40, 40)]"
-      ]
-     },
-     "execution_count": 71,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%%sql\n",
-    "DROP TABLE IF EXISTS out;\n",
-    "\n",
-    "SELECT madlib.stratified_sample(\n",
-    "                                'test',    -- Source table\n",
-    "                                'out',     -- Output table\n",
-    "                                0.5,       -- Sample proportion\n",
-    "                                'gr1,gr2', -- Strata definition\n",
-    "                                'id1,id2', -- Columns to output\n",
-    "                                TRUE);     -- Sample with replacement\n",
-    "\n",
-    "SELECT * FROM out ORDER BY gr1,gr2,id1,id2;"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}

Reply via email to