This is an automated email from the ASF dual-hosted git repository.
fmcquillan pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/madlib-site.git
The following commit(s) were added to refs/heads/asf-site by this push:
new 3a7f9ed add ipython notebook for window functions
3a7f9ed is described below
commit 3a7f9ed2e8dbaa6d0b0a406593f00b0598e1bbf0
Author: Frank McQuillan <[email protected]>
AuthorDate: Fri Apr 24 12:45:54 2020 -0700
add ipython notebook for window functions
---
.../Time-series/Window-functions-v1.ipynb | 1910 ++++++++++++++++++++
1 file changed, 1910 insertions(+)
diff --git a/community-artifacts/Time-series/Window-functions-v1.ipynb
b/community-artifacts/Time-series/Window-functions-v1.ipynb
new file mode 100644
index 0000000..9c30a40
--- /dev/null
+++ b/community-artifacts/Time-series/Window-functions-v1.ipynb
@@ -0,0 +1,1910 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Time series example - window functions\n",
+ "\n",
+ "Some example queries on time series data using aggregates and window
functions. Thanks to Divya Bhargov from VMware for this example notebook.\n",
+ "\n",
+ "Data from
https://data.cityofchicago.org/Transportation/Potholes-Patched/wqdh-9gek/data
which is loaded from CSV format.\n",
+ "\n",
+ "## Table of contents \n",
+ "\n",
+ "<a href=\"#setup\">1. Connect to database</a>\n",
+ "\n",
+ "<a href=\"#load_dataset\">2. Load data</a>\n",
+ "\n",
+ "<a href=\"#window\">3. Window functions</a>\n",
+ "\n",
+ "<a href=\"#mapping\">4. Mapping for gap filling</a>"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "<a id=\"setup\"></a>\n",
+ "## 1. Connect to database"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The sql extension is already loaded. To reload it, use:\n",
+ " %reload_ext sql\n",
+ "1 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>version</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>PostgreSQL 8.3.23 (Greenplum Database 5.18.0 build
commit:6aec9959d367d46c6b4391eb9ffc82c735d20102) on x86_64-pc-linux-gnu,
compiled by GCC gcc (GCC) 6.2.0, 64-bit compiled on Apr 3 2019
14:45:51</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(u'PostgreSQL 8.3.23 (Greenplum Database 5.18.0 build
commit:6aec9959d367d46c6b4391eb9ffc82c735d20102) on x86_64-pc-linux-gnu,
compiled by GCC gcc (GCC) 6.2.0, 64-bit compiled on Apr 3 2019 14:45:51',)]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%load_ext sql\n",
+ "\n",
+ "# Greenplum Database 5.x on GCP (PM demo machine) - via tunnel\n",
+ "%sql postgresql://gpadmin@localhost:8000/madlib\n",
+ " \n",
+ "# PostgreSQL local\n",
+ "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
+ " \n",
+ "%sql SELECT version();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "<a id=\"load_dataset\"></a>\n",
+ "## 2. Load data\n",
+ "Load from CSV. You will need to change the path to the location of the
CSV file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done.\n",
+ "Done.\n",
+ "65544 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "DROP TABLE IF EXISTS chicago_potholes_patched;\n",
+ "CREATE TABLE chicago_potholes_patched (\n",
+ " id serial NOT NULL,\n",
+ " address TEXT,\n",
+ " request_date TIMESTAMP,\n",
+ " completion_date TIMESTAMP,\n",
+ " number_of_potholes_filled_on_block INTEGER,\n",
+ " latitude FLOAT,\n",
+ " longitude FLOAT,\n",
+ " location TEXT\n",
+ ");\n",
+ "\n",
+ "/* COPY chicago_potholes_patched(address, request_date, completion_date,
number_of_potholes_filled_on_block, latitude, longitude, location) \n",
+ "FROM
'/Users/fmcquillan/Documents/Product/MADlib/Demos/Data/potholes_patched.csv'
DELIMITER ',' CSV HEADER;*/\n",
+ "\n",
+ "COPY chicago_potholes_patched(address, request_date, completion_date,
number_of_potholes_filled_on_block, latitude, longitude, location) \n",
+ "FROM '/home/gpadmin/data/potholes_patched.csv' DELIMITER ',' CSV HEADER;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>id</th>\n",
+ " <th>address</th>\n",
+ " <th>request_date</th>\n",
+ " <th>completion_date</th>\n",
+ " <th>number_of_potholes_filled_on_block</th>\n",
+ " <th>latitude</th>\n",
+ " <th>longitude</th>\n",
+ " <th>location</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64544</td>\n",
+ " <td>4649 W FULLERTON AVE</td>\n",
+ " <td>2020-04-14 11:14:50</td>\n",
+ " <td>2020-04-21 15:25:34</td>\n",
+ " <td>4</td>\n",
+ " <td>41.924278082</td>\n",
+ " <td>-87.743872241</td>\n",
+ " <td>POINT (-87.743872241019 41.924278081659)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64878</td>\n",
+ " <td>2310 N KNOX AVE</td>\n",
+ " <td>2020-04-18 12:14:13</td>\n",
+ " <td>2020-04-21 15:24:44</td>\n",
+ " <td>12</td>\n",
+ " <td>41.922840944</td>\n",
+ " <td>-87.742654834</td>\n",
+ " <td>POINT (-87.742654834295 41.922840943632)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64459</td>\n",
+ " <td>2258 N KNOX AVE</td>\n",
+ " <td>2020-04-18 12:06:17</td>\n",
+ " <td>2020-04-21 15:23:55</td>\n",
+ " <td>15</td>\n",
+ " <td>41.922467136</td>\n",
+ " <td>-87.742610186</td>\n",
+ " <td>POINT (-87.742610186145 41.922467135904)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64807</td>\n",
+ " <td>3958 W FULLERTON AVE</td>\n",
+ " <td>2020-04-16 12:37:36</td>\n",
+ " <td>2020-04-21 15:22:08</td>\n",
+ " <td>2</td>\n",
+ " <td>41.924817886</td>\n",
+ " <td>-87.726093348</td>\n",
+ " <td>POINT (-87.726093348147 41.924817886186)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64812</td>\n",
+ " <td>2711 N PULASKI RD</td>\n",
+ " <td>2020-04-16 13:03:30</td>\n",
+ " <td>2020-04-21 15:20:16</td>\n",
+ " <td>2</td>\n",
+ " <td>41.930408431</td>\n",
+ " <td>-87.726595137</td>\n",
+ " <td>POINT (-87.726595137074 41.930408431011)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64844</td>\n",
+ " <td>4444 W DEMING PL</td>\n",
+ " <td>2020-04-14 11:07:56</td>\n",
+ " <td>2020-04-21 15:18:38</td>\n",
+ " <td>1</td>\n",
+ " <td>41.92754944</td>\n",
+ " <td>-87.738454216</td>\n",
+ " <td>POINT (-87.738454216115 41.927549439867)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64848</td>\n",
+ " <td>2536 N CENTRAL PARK AVE</td>\n",
+ " <td>2020-04-20 13:19:30</td>\n",
+ " <td>2020-04-21 15:16:36</td>\n",
+ " <td>19</td>\n",
+ " <td>41.927537543</td>\n",
+ " <td>-87.717339518</td>\n",
+ " <td>POINT (-87.717339517786 41.92753754297)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64545</td>\n",
+ " <td>5607 S KENWOOD AVE</td>\n",
+ " <td>2020-04-21 15:08:30</td>\n",
+ " <td>2020-04-21 15:12:51</td>\n",
+ " <td>4</td>\n",
+ " <td>41.793048858</td>\n",
+ " <td>-87.592978731</td>\n",
+ " <td>POINT (-87.592978730873 41.793048857693)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64765</td>\n",
+ " <td>8701 W FOSTER AVE</td>\n",
+ " <td>2020-04-16 18:46:19</td>\n",
+ " <td>2020-04-21 15:11:44</td>\n",
+ " <td>1</td>\n",
+ " <td>41.973190749</td>\n",
+ " <td>-87.844147025</td>\n",
+ " <td>POINT (-87.844147024715 41.97319074886)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>64892</td>\n",
+ " <td>11521 S LA SALLE ST</td>\n",
+ " <td>2020-04-19 05:14:37</td>\n",
+ " <td>2020-04-21 14:57:33</td>\n",
+ " <td>15</td>\n",
+ " <td>41.684608279</td>\n",
+ " <td>-87.626219142</td>\n",
+ " <td>POINT (-87.626219142292 41.684608279405)</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(64544, u'4649 W FULLERTON AVE', datetime.datetime(2020, 4, 14, 11,
14, 50), datetime.datetime(2020, 4, 21, 15, 25, 34), 4, 41.924278082,
-87.743872241, u'POINT (-87.743872241019 41.924278081659)'),\n",
+ " (64878, u'2310 N KNOX AVE', datetime.datetime(2020, 4, 18, 12, 14,
13), datetime.datetime(2020, 4, 21, 15, 24, 44), 12, 41.922840944,
-87.742654834, u'POINT (-87.742654834295 41.922840943632)'),\n",
+ " (64459, u'2258 N KNOX AVE', datetime.datetime(2020, 4, 18, 12, 6,
17), datetime.datetime(2020, 4, 21, 15, 23, 55), 15, 41.922467136,
-87.742610186, u'POINT (-87.742610186145 41.922467135904)'),\n",
+ " (64807, u'3958 W FULLERTON AVE', datetime.datetime(2020, 4, 16, 12,
37, 36), datetime.datetime(2020, 4, 21, 15, 22, 8), 2, 41.924817886,
-87.726093348, u'POINT (-87.726093348147 41.924817886186)'),\n",
+ " (64812, u'2711 N PULASKI RD', datetime.datetime(2020, 4, 16, 13, 3,
30), datetime.datetime(2020, 4, 21, 15, 20, 16), 2, 41.930408431,
-87.726595137, u'POINT (-87.726595137074 41.930408431011)'),\n",
+ " (64844, u'4444 W DEMING PL', datetime.datetime(2020, 4, 14, 11, 7,
56), datetime.datetime(2020, 4, 21, 15, 18, 38), 1, 41.92754944, -87.738454216,
u'POINT (-87.738454216115 41.927549439867)'),\n",
+ " (64848, u'2536 N CENTRAL PARK AVE', datetime.datetime(2020, 4, 20,
13, 19, 30), datetime.datetime(2020, 4, 21, 15, 16, 36), 19, 41.927537543,
-87.717339518, u'POINT (-87.717339517786 41.92753754297)'),\n",
+ " (64545, u'5607 S KENWOOD AVE', datetime.datetime(2020, 4, 21, 15, 8,
30), datetime.datetime(2020, 4, 21, 15, 12, 51), 4, 41.793048858,
-87.592978731, u'POINT (-87.592978730873 41.793048857693)'),\n",
+ " (64765, u'8701 W FOSTER AVE', datetime.datetime(2020, 4, 16, 18, 46,
19), datetime.datetime(2020, 4, 21, 15, 11, 44), 1, 41.973190749,
-87.844147025, u'POINT (-87.844147024715 41.97319074886)'),\n",
+ " (64892, u'11521 S LA SALLE ST', datetime.datetime(2020, 4, 19, 5, 14,
37), datetime.datetime(2020, 4, 21, 14, 57, 33), 15, 41.684608279,
-87.626219142, u'POINT (-87.626219142292 41.684608279405)')]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM chicago_potholes_patched ORDER BY completion_date DESC
LIMIT 10;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>id</th>\n",
+ " <th>address</th>\n",
+ " <th>request_date</th>\n",
+ " <th>completion_date</th>\n",
+ " <th>number_of_potholes_filled_on_block</th>\n",
+ " <th>latitude</th>\n",
+ " <th>longitude</th>\n",
+ " <th>location</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>5505 W WINDSOR AVE</td>\n",
+ " <td>2019-04-24 14:06:04</td>\n",
+ " <td>2019-04-26 14:05:16</td>\n",
+ " <td>5</td>\n",
+ " <td>41.96306727</td>\n",
+ " <td>-87.765233061</td>\n",
+ " <td>POINT (-87.765233060697 41.963067269624)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>4</td>\n",
+ " <td>400 E WALDRON DR</td>\n",
+ " <td>2019-07-23 10:00:36</td>\n",
+ " <td>2019-07-23 16:16:48</td>\n",
+ " <td>10</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>6</td>\n",
+ " <td>2101 S CANAL ST</td>\n",
+ " <td>2019-07-23 09:57:55</td>\n",
+ " <td>2019-07-23 09:58:51</td>\n",
+ " <td>12</td>\n",
+ " <td>41.854048592</td>\n",
+ " <td>-87.638463921</td>\n",
+ " <td>POINT (-87.638463920776 41.854048592448)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>8</td>\n",
+ " <td>3651 W AINSLIE ST</td>\n",
+ " <td>2019-03-15 11:43:36</td>\n",
+ " <td>2019-04-29 14:37:02</td>\n",
+ " <td>10</td>\n",
+ " <td>41.970029</td>\n",
+ " <td>-87.7203</td>\n",
+ " <td>POINT (-87.7203 41.970029)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>10</td>\n",
+ " <td>10346 S PULASKI RD</td>\n",
+ " <td>2019-02-28 16:42:32</td>\n",
+ " <td>2019-03-05 11:14:02</td>\n",
+ " <td>1</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>12</td>\n",
+ " <td>521 E 91ST PL</td>\n",
+ " <td>2019-06-13 10:18:24</td>\n",
+ " <td>2019-07-23 13:49:35</td>\n",
+ " <td>12</td>\n",
+ " <td>41.728137874</td>\n",
+ " <td>-87.610980602</td>\n",
+ " <td>POINT (-87.610980602161 41.7281378741)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>14</td>\n",
+ " <td>801 E SOLIDARITY DR</td>\n",
+ " <td>2019-07-23 10:09:20</td>\n",
+ " <td>2019-07-23 10:10:10</td>\n",
+ " <td>20</td>\n",
+ " <td>41.866144383</td>\n",
+ " <td>-87.609999814</td>\n",
+ " <td>POINT (-87.609999814415 41.866144383387)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>16</td>\n",
+ " <td>2500 W 36TH ST</td>\n",
+ " <td>2019-06-09 18:46:30</td>\n",
+ " <td>2019-07-23 10:46:44</td>\n",
+ " <td>48</td>\n",
+ " <td>41.828545615</td>\n",
+ " <td>-87.687820304</td>\n",
+ " <td>POINT (-87.687820304125 41.828545615092)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>18</td>\n",
+ " <td>300 W CERMAK RD</td>\n",
+ " <td>2019-07-21 18:26:09</td>\n",
+ " <td>2019-07-23 15:06:49</td>\n",
+ " <td>18</td>\n",
+ " <td>41.85290813</td>\n",
+ " <td>-87.634936965</td>\n",
+ " <td>POINT (-87.634936964669 41.852908130256)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>20</td>\n",
+ " <td>328 N DESPLAINES ST</td>\n",
+ " <td>2019-07-22 10:05:41</td>\n",
+ " <td>2019-07-23 09:54:46</td>\n",
+ " <td>10</td>\n",
+ " <td>41.887721193</td>\n",
+ " <td>-87.644343544</td>\n",
+ " <td>POINT (-87.644343543685 41.88772119348)</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(2, u'5505 W WINDSOR AVE', datetime.datetime(2019, 4, 24, 14, 6, 4),
datetime.datetime(2019, 4, 26, 14, 5, 16), 5, 41.96306727, -87.765233061,
u'POINT (-87.765233060697 41.963067269624)'),\n",
+ " (4, u'400 E WALDRON DR', datetime.datetime(2019, 7, 23, 10, 0, 36),
datetime.datetime(2019, 7, 23, 16, 16, 48), 10, None, None, None),\n",
+ " (6, u'2101 S CANAL ST', datetime.datetime(2019, 7, 23, 9, 57, 55),
datetime.datetime(2019, 7, 23, 9, 58, 51), 12, 41.854048592, -87.638463921,
u'POINT (-87.638463920776 41.854048592448)'),\n",
+ " (8, u'3651 W AINSLIE ST', datetime.datetime(2019, 3, 15, 11, 43, 36),
datetime.datetime(2019, 4, 29, 14, 37, 2), 10, 41.970029, -87.7203, u'POINT
(-87.7203 41.970029)'),\n",
+ " (10, u'10346 S PULASKI RD', datetime.datetime(2019, 2, 28, 16, 42,
32), datetime.datetime(2019, 3, 5, 11, 14, 2), 1, None, None, None),\n",
+ " (12, u'521 E 91ST PL', datetime.datetime(2019, 6, 13, 10, 18, 24),
datetime.datetime(2019, 7, 23, 13, 49, 35), 12, 41.728137874, -87.610980602,
u'POINT (-87.610980602161 41.7281378741)'),\n",
+ " (14, u'801 E SOLIDARITY DR', datetime.datetime(2019, 7, 23, 10, 9,
20), datetime.datetime(2019, 7, 23, 10, 10, 10), 20, 41.866144383,
-87.609999814, u'POINT (-87.609999814415 41.866144383387)'),\n",
+ " (16, u'2500 W 36TH ST', datetime.datetime(2019, 6, 9, 18, 46, 30),
datetime.datetime(2019, 7, 23, 10, 46, 44), 48, 41.828545615, -87.687820304,
u'POINT (-87.687820304125 41.828545615092)'),\n",
+ " (18, u'300 W CERMAK RD', datetime.datetime(2019, 7, 21, 18, 26, 9),
datetime.datetime(2019, 7, 23, 15, 6, 49), 18, 41.85290813, -87.634936965,
u'POINT (-87.634936964669 41.852908130256)'),\n",
+ " (20, u'328 N DESPLAINES ST', datetime.datetime(2019, 7, 22, 10, 5,
41), datetime.datetime(2019, 7, 23, 9, 54, 46), 10, 41.887721193,
-87.644343544, u'POINT (-87.644343543685 41.88772119348)')]"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "select * from chicago_potholes_patched limit 10;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Transform to year - month format"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done.\n",
+ "Done.\n",
+ "22 rows affected.\n",
+ "22 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>completion_year</th>\n",
+ " <th>completion_month</th>\n",
+ " <th>total_potholes_patched</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>7</td>\n",
+ " <td>25221</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>8</td>\n",
+ " <td>21251</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>9</td>\n",
+ " <td>13865</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>10</td>\n",
+ " <td>15853</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>11</td>\n",
+ " <td>13078</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>12</td>\n",
+ " <td>12287</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>1</td>\n",
+ " <td>32819</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>2</td>\n",
+ " <td>78509</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>3</td>\n",
+ " <td>135763</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>4</td>\n",
+ " <td>106023</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>5</td>\n",
+ " <td>55697</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>6</td>\n",
+ " <td>25742</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>7</td>\n",
+ " <td>16474</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>8</td>\n",
+ " <td>12880</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>9</td>\n",
+ " <td>19201</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>10</td>\n",
+ " <td>18079</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>11</td>\n",
+ " <td>19133</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>12</td>\n",
+ " <td>21691</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>1</td>\n",
+ " <td>58844</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>2</td>\n",
+ " <td>49768</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>3</td>\n",
+ " <td>67589</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>4</td>\n",
+ " <td>39150</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(2018, 7, 25221),\n",
+ " (2018, 8, 21251),\n",
+ " (2018, 9, 13865),\n",
+ " (2018, 10, 15853),\n",
+ " (2018, 11, 13078),\n",
+ " (2018, 12, 12287),\n",
+ " (2019, 1, 32819),\n",
+ " (2019, 2, 78509),\n",
+ " (2019, 3, 135763),\n",
+ " (2019, 4, 106023),\n",
+ " (2019, 5, 55697),\n",
+ " (2019, 6, 25742),\n",
+ " (2019, 7, 16474),\n",
+ " (2019, 8, 12880),\n",
+ " (2019, 9, 19201),\n",
+ " (2019, 10, 18079),\n",
+ " (2019, 11, 19133),\n",
+ " (2019, 12, 21691),\n",
+ " (2020, 1, 58844),\n",
+ " (2020, 2, 49768),\n",
+ " (2020, 3, 67589),\n",
+ " (2020, 4, 39150)]"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "\n",
+ "DROP TABLE IF EXISTS temp_total_potholes_patched;\n",
+ "\n",
+ "CREATE TABLE temp_total_potholes_patched(\n",
+ " completion_year INTEGER,\n",
+ " completion_month INTEGER,\n",
+ " total_potholes_patched INTEGER\n",
+ ");\n",
+ "\n",
+ "INSERT INTO temp_total_potholes_patched \n",
+ " SELECT date_part('year', completion_date) AS completion_year,\n",
+ " date_part('month', completion_date) AS completion_month,\n",
+ " sum(number_of_potholes_filled_on_block) AS total_potholes_patched\n",
+ " FROM chicago_potholes_patched\n",
+ " GROUP BY completion_year, completion_month;\n",
+ "\n",
+ "SELECT * FROM temp_total_potholes_patched order by completion_year,
completion_month;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "<a id=\"window\"></a>\n",
+ "## 3. Window functions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Potholes patched every month, compared to average potholes patched
for the year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "22 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>completion_year</th>\n",
+ " <th>completion_month</th>\n",
+ " <th>total_potholes_patched</th>\n",
+ " <th>average_for_year</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>7</td>\n",
+ " <td>25221</td>\n",
+ " <td>16925.8333333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>8</td>\n",
+ " <td>21251</td>\n",
+ " <td>16925.8333333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>9</td>\n",
+ " <td>13865</td>\n",
+ " <td>16925.8333333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>10</td>\n",
+ " <td>15853</td>\n",
+ " <td>16925.8333333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>11</td>\n",
+ " <td>13078</td>\n",
+ " <td>16925.8333333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>12</td>\n",
+ " <td>12287</td>\n",
+ " <td>16925.8333333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>1</td>\n",
+ " <td>32819</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>2</td>\n",
+ " <td>78509</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>3</td>\n",
+ " <td>135763</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>4</td>\n",
+ " <td>106023</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>5</td>\n",
+ " <td>55697</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>6</td>\n",
+ " <td>25742</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>7</td>\n",
+ " <td>16474</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>8</td>\n",
+ " <td>12880</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>9</td>\n",
+ " <td>19201</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>10</td>\n",
+ " <td>18079</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>11</td>\n",
+ " <td>19133</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>12</td>\n",
+ " <td>21691</td>\n",
+ " <td>45167.5833333333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>1</td>\n",
+ " <td>58844</td>\n",
+ " <td>53837.75</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>2</td>\n",
+ " <td>49768</td>\n",
+ " <td>53837.75</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>3</td>\n",
+ " <td>67589</td>\n",
+ " <td>53837.75</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>4</td>\n",
+ " <td>39150</td>\n",
+ " <td>53837.75</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(2018, 7, 25221, Decimal('16925.8333333333')),\n",
+ " (2018, 8, 21251, Decimal('16925.8333333333')),\n",
+ " (2018, 9, 13865, Decimal('16925.8333333333')),\n",
+ " (2018, 10, 15853, Decimal('16925.8333333333')),\n",
+ " (2018, 11, 13078, Decimal('16925.8333333333')),\n",
+ " (2018, 12, 12287, Decimal('16925.8333333333')),\n",
+ " (2019, 1, 32819, Decimal('45167.5833333333')),\n",
+ " (2019, 2, 78509, Decimal('45167.5833333333')),\n",
+ " (2019, 3, 135763, Decimal('45167.5833333333')),\n",
+ " (2019, 4, 106023, Decimal('45167.5833333333')),\n",
+ " (2019, 5, 55697, Decimal('45167.5833333333')),\n",
+ " (2019, 6, 25742, Decimal('45167.5833333333')),\n",
+ " (2019, 7, 16474, Decimal('45167.5833333333')),\n",
+ " (2019, 8, 12880, Decimal('45167.5833333333')),\n",
+ " (2019, 9, 19201, Decimal('45167.5833333333')),\n",
+ " (2019, 10, 18079, Decimal('45167.5833333333')),\n",
+ " (2019, 11, 19133, Decimal('45167.5833333333')),\n",
+ " (2019, 12, 21691, Decimal('45167.5833333333')),\n",
+ " (2020, 1, 58844, Decimal('53837.75')),\n",
+ " (2020, 2, 49768, Decimal('53837.75')),\n",
+ " (2020, 3, 67589, Decimal('53837.75')),\n",
+ " (2020, 4, 39150, Decimal('53837.75'))]"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "SELECT completion_year, completion_month, total_potholes_patched,\n",
+ "avg(total_potholes_patched) OVER (PARTITION BY completion_year) AS
average_for_year \n",
+ "FROM temp_total_potholes_patched\n",
+ "ORDER BY completion_year, completion_month;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png":
"iVBORw0KGgoAAAANSUhEUgAABJoAAAEyCAYAAACyHrqiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi41LCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvSM8oowAAIABJREFUeJzs3Xd81dX9x/HXySIkEEaAMAIk7BlGwhZZMlQURRwoswoOwFqtSmtbbWtba63WAuJkqiiiKFoVAUEQCBCmbAIkEGQEQhKSkH1+f+TKL8yErG/G+/l45MG955zv+b7vhccjl8893/M11lpEREREREREREQKy83pACIiIiIiIiIiUj6o0CQiIiIiIiIiIkVChSYRERERERERESkSKjSJiIiIiIiIiEiRUKFJRERERERERESKhApNIiIiIiIiIiJSJFRoEhERERERERGRIqFC
[...]
+ "text/plain": [
+ "<Figure size 1440x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "df = _.DataFrame()\n",
+ "\n",
+ "df['date'] = df.completion_year.map(\"{:04}\".format) +
df.completion_month.map(\"{:02}\".format)\n",
+ "\n",
+ "plt.figure(figsize=(20,5))\n",
+ "\n",
+ "plt.plot(df.date,df.total_potholes_patched, linestyle='solid');\n",
+ "plt.plot(df.date,df.average_for_year, linestyle='dashed');\n",
+ "plt.legend();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Potholes patched every month, compared to previous month"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "22 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>completion_year</th>\n",
+ " <th>completion_month</th>\n",
+ " <th>total_potholes_patched</th>\n",
+ " <th>potholes_patched_last_month</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>7</td>\n",
+ " <td>25221</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>8</td>\n",
+ " <td>21251</td>\n",
+ " <td>25221</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>9</td>\n",
+ " <td>13865</td>\n",
+ " <td>21251</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>10</td>\n",
+ " <td>15853</td>\n",
+ " <td>13865</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>11</td>\n",
+ " <td>13078</td>\n",
+ " <td>15853</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>12</td>\n",
+ " <td>12287</td>\n",
+ " <td>13078</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>1</td>\n",
+ " <td>32819</td>\n",
+ " <td>12287</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>2</td>\n",
+ " <td>78509</td>\n",
+ " <td>32819</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>3</td>\n",
+ " <td>135763</td>\n",
+ " <td>78509</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>4</td>\n",
+ " <td>106023</td>\n",
+ " <td>135763</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>5</td>\n",
+ " <td>55697</td>\n",
+ " <td>106023</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>6</td>\n",
+ " <td>25742</td>\n",
+ " <td>55697</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>7</td>\n",
+ " <td>16474</td>\n",
+ " <td>25742</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>8</td>\n",
+ " <td>12880</td>\n",
+ " <td>16474</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>9</td>\n",
+ " <td>19201</td>\n",
+ " <td>12880</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>10</td>\n",
+ " <td>18079</td>\n",
+ " <td>19201</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>11</td>\n",
+ " <td>19133</td>\n",
+ " <td>18079</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>12</td>\n",
+ " <td>21691</td>\n",
+ " <td>19133</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>1</td>\n",
+ " <td>58844</td>\n",
+ " <td>21691</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>2</td>\n",
+ " <td>49768</td>\n",
+ " <td>58844</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>3</td>\n",
+ " <td>67589</td>\n",
+ " <td>49768</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>4</td>\n",
+ " <td>39150</td>\n",
+ " <td>67589</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(2018, 7, 25221, None),\n",
+ " (2018, 8, 21251, 25221),\n",
+ " (2018, 9, 13865, 21251),\n",
+ " (2018, 10, 15853, 13865),\n",
+ " (2018, 11, 13078, 15853),\n",
+ " (2018, 12, 12287, 13078),\n",
+ " (2019, 1, 32819, 12287),\n",
+ " (2019, 2, 78509, 32819),\n",
+ " (2019, 3, 135763, 78509),\n",
+ " (2019, 4, 106023, 135763),\n",
+ " (2019, 5, 55697, 106023),\n",
+ " (2019, 6, 25742, 55697),\n",
+ " (2019, 7, 16474, 25742),\n",
+ " (2019, 8, 12880, 16474),\n",
+ " (2019, 9, 19201, 12880),\n",
+ " (2019, 10, 18079, 19201),\n",
+ " (2019, 11, 19133, 18079),\n",
+ " (2019, 12, 21691, 19133),\n",
+ " (2020, 1, 58844, 21691),\n",
+ " (2020, 2, 49768, 58844),\n",
+ " (2020, 3, 67589, 49768),\n",
+ " (2020, 4, 39150, 67589)]"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "SELECT completion_year, completion_month, total_potholes_patched,\n",
+ "lag(total_potholes_patched, 1) OVER (ORDER BY completion_year,
completion_month) AS potholes_patched_last_month\n",
+ "FROM temp_total_potholes_patched\n",
+ "ORDER BY completion_year, completion_month;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png":
"iVBORw0KGgoAAAANSUhEUgAABJoAAAEyCAYAAACyHrqiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi41LCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvSM8oowAAIABJREFUeJzs3Xd0VVX+/vH3TiOEXkIvCb2EBEgITboIKooiqEhVBEXUmXF0ZEZnmLHMT0dGLGBXmooKDogVEUVaEEKVXhMIUkIJJKQn+/dHrvnSk1ySnNzwvNbKyr377LPPc29YK5dP9tnbWGsRERERERERERG5Wl5OBxARERERERERkdJBhSYRERERERERESkUKjSJiIiIiIiIiEihUKFJREREREREREQKhQpNIiIiIiIiIiJSKFRoEhERERERERGRQqFCk4iIiIiIiIiIFAoVmkRE
[...]
+ "text/plain": [
+ "<Figure size 1440x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "df = _.DataFrame()\n",
+ "\n",
+ "df['date'] = df.completion_year.map(\"{:04}\".format) +
df.completion_month.map(\"{:02}\".format)\n",
+ "\n",
+ "plt.figure(figsize=(20,5))\n",
+ "\n",
+ "plt.plot(df.date,df.total_potholes_patched, linestyle='solid');\n",
+ "plt.plot(df.date,df.potholes_patched_last_month, linestyle='dashed');\n",
+ "plt.legend();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Accumulation - Total potholes patched to date"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "22 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>completion_year</th>\n",
+ " <th>completion_month</th>\n",
+ " <th>total_potholes_patched</th>\n",
+ " <th>total_potholes_patched_to_date</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>7</td>\n",
+ " <td>25221</td>\n",
+ " <td>25221</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>8</td>\n",
+ " <td>21251</td>\n",
+ " <td>46472</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>9</td>\n",
+ " <td>13865</td>\n",
+ " <td>60337</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>10</td>\n",
+ " <td>15853</td>\n",
+ " <td>76190</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>11</td>\n",
+ " <td>13078</td>\n",
+ " <td>89268</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2018</td>\n",
+ " <td>12</td>\n",
+ " <td>12287</td>\n",
+ " <td>101555</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>1</td>\n",
+ " <td>32819</td>\n",
+ " <td>134374</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>2</td>\n",
+ " <td>78509</td>\n",
+ " <td>212883</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>3</td>\n",
+ " <td>135763</td>\n",
+ " <td>348646</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>4</td>\n",
+ " <td>106023</td>\n",
+ " <td>454669</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>5</td>\n",
+ " <td>55697</td>\n",
+ " <td>510366</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>6</td>\n",
+ " <td>25742</td>\n",
+ " <td>536108</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>7</td>\n",
+ " <td>16474</td>\n",
+ " <td>552582</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>8</td>\n",
+ " <td>12880</td>\n",
+ " <td>565462</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>9</td>\n",
+ " <td>19201</td>\n",
+ " <td>584663</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>10</td>\n",
+ " <td>18079</td>\n",
+ " <td>602742</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>11</td>\n",
+ " <td>19133</td>\n",
+ " <td>621875</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>12</td>\n",
+ " <td>21691</td>\n",
+ " <td>643566</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>1</td>\n",
+ " <td>58844</td>\n",
+ " <td>702410</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>2</td>\n",
+ " <td>49768</td>\n",
+ " <td>752178</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>3</td>\n",
+ " <td>67589</td>\n",
+ " <td>819767</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020</td>\n",
+ " <td>4</td>\n",
+ " <td>39150</td>\n",
+ " <td>858917</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(2018, 7, 25221, 25221L),\n",
+ " (2018, 8, 21251, 46472L),\n",
+ " (2018, 9, 13865, 60337L),\n",
+ " (2018, 10, 15853, 76190L),\n",
+ " (2018, 11, 13078, 89268L),\n",
+ " (2018, 12, 12287, 101555L),\n",
+ " (2019, 1, 32819, 134374L),\n",
+ " (2019, 2, 78509, 212883L),\n",
+ " (2019, 3, 135763, 348646L),\n",
+ " (2019, 4, 106023, 454669L),\n",
+ " (2019, 5, 55697, 510366L),\n",
+ " (2019, 6, 25742, 536108L),\n",
+ " (2019, 7, 16474, 552582L),\n",
+ " (2019, 8, 12880, 565462L),\n",
+ " (2019, 9, 19201, 584663L),\n",
+ " (2019, 10, 18079, 602742L),\n",
+ " (2019, 11, 19133, 621875L),\n",
+ " (2019, 12, 21691, 643566L),\n",
+ " (2020, 1, 58844, 702410L),\n",
+ " (2020, 2, 49768, 752178L),\n",
+ " (2020, 3, 67589, 819767L),\n",
+ " (2020, 4, 39150, 858917L)]"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "select completion_year, completion_month, total_potholes_patched,\n",
+ "sum(total_potholes_patched) over (order by completion_year,
completion_month) as total_potholes_patched_to_date\n",
+ "From temp_total_potholes_patched\n",
+ "order by completion_year, completion_month;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png":
"iVBORw0KGgoAAAANSUhEUgAABKgAAAE/CAYAAABihvt5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi41LCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvSM8oowAAIABJREFUeJzt3Xu4XFV9//H3lwQSIJBACCgJmIBQiIoBI1BFq2DlpgQtKKiAoCI/RFTqBa1KqMVCbcVSUYuAAlUBhVZUELkYFVouAQJyN0CUIEJMSAAFScL398deB4bDucw5ZLJnzrxfzzPPmVn79pl9JpmZ71lr7chMJEmSJEmSpLqsUXcASZIkSZIkdTcLVJIkSZIkSaqVBSpJkiRJkiTVygKVJEmSJEmSamWBSpIkSZIkSbWyQCVJkiRJkqRaWaCSJEn9ioixEZERMWU1HGubiFgx
[...]
+ "text/plain": [
+ "<Figure size 1440x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "df = _.DataFrame()\n",
+ "\n",
+ "df['date'] = df.completion_year.map(\"{:04}\".format) +
df.completion_month.map(\"{:02}\".format)\n",
+ "\n",
+ "\n",
+ "plt.figure(figsize=(20,5))\n",
+ "\n",
+ "p1 = plt.bar(df.date, df.total_potholes_patched)\n",
+ "p2 = plt.bar(df.date, df.total_potholes_patched_to_date,\n",
+ " bottom=df.total_potholes_patched)\n",
+ "\n",
+ "\n",
+ "plt.ylabel('Total potholes patched');\n",
+ "plt.title('Total potholes patched by month and year');\n",
+ "plt.legend((p1[0], p2[0]), ('Monthly', 'YTD'));\n",
+ "\n",
+ "plt.show();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Rank - Top 3 months of 2019"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "3 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>completion_year</th>\n",
+ " <th>completion_month</th>\n",
+ " <th>total_potholes_patched</th>\n",
+ " <th>rank</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>3</td>\n",
+ " <td>135763</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>4</td>\n",
+ " <td>106023</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019</td>\n",
+ " <td>2</td>\n",
+ " <td>78509</td>\n",
+ " <td>3</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(2019, 3, 135763, 1L), (2019, 4, 106023, 2L), (2019, 2, 78509, 3L)]"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM (\n",
+ " SELECT completion_year,\n",
+ " completion_month,\n",
+ " total_potholes_patched,\n",
+ " rank() OVER (PARTITION BY completion_year \n",
+ " ORDER BY total_potholes_patched
desc) AS rank\n",
+ " FROM temp_total_potholes_patched\n",
+ " ORDER BY completion_year, completion_month\n",
+ ") AS ranked_records\n",
+ "WHERE rank <= 3\n",
+ "AND completion_year = 2019;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png":
"iVBORw0KGgoAAAANSUhEUgAAAnoAAAE/CAYAAADR125OAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi41LCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvSM8oowAAIABJREFUeJzt3XuUJlV57/HvTxBREbkNKAw4GDEEb0cYkXiLCoHxshxMvOBRGQ0ROWDUo4miiZKoJJpkBcMJmoNABOMRETVwIgQRvHsGGUBFQGXkIoNcRoab4m3gOX/Ubnlpu3tepvvtnqn+ftZ6V1ftvav2U2/XzDxTu3ZVqgpJkiT1zwPmOgBJkiSNhomeJElST5noSZIk9ZSJniRJUk+Z6EmSJPWUiZ4kSVJPmehJPZFk8ySVZOEs9LV7krXrue2SJCtnOqbZkuTGJM+YoX2dmuSv
[...]
+ "text/plain": [
+ "<Figure size 720x360 with 1 Axes>"
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "df = _.DataFrame()\n",
+ "\n",
+ "df['date'] = df.completion_year.map(\"{:04}\".format) +
df.completion_month.map(\"{:02}\".format)\n",
+ "\n",
+ "\n",
+ "plt.figure(figsize=(10,5))\n",
+ "\n",
+ "p1 = plt.bar(df.date, df.total_potholes_patched)\n",
+ "\n",
+ "\n",
+ "plt.ylabel('Total potholes patched');\n",
+ "plt.title('Total potholes patched by month and year');\n",
+ "\n",
+ "plt.show();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "<a id=\"mapping\"></a>\n",
+ "## 4. Mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>completion_date</th>\n",
+ " <th>interval_bound</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-04-26 14:05:16</td>\n",
+ " <td>2019-04-26 14:05:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-07-23 16:16:48</td>\n",
+ " <td>2019-07-23 16:15:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-07-23 09:58:51</td>\n",
+ " <td>2019-07-23 09:55:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-04-29 14:37:02</td>\n",
+ " <td>2019-04-29 14:35:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-03-05 11:14:02</td>\n",
+ " <td>2019-03-05 11:10:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-07-23 13:49:35</td>\n",
+ " <td>2019-07-23 13:45:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-07-23 10:10:10</td>\n",
+ " <td>2019-07-23 10:10:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-07-23 10:46:44</td>\n",
+ " <td>2019-07-23 10:45:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-07-23 15:06:49</td>\n",
+ " <td>2019-07-23 15:05:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-07-23 09:54:46</td>\n",
+ " <td>2019-07-23 09:50:00</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(datetime.datetime(2019, 4, 26, 14, 5, 16), datetime.datetime(2019,
4, 26, 14, 5)),\n",
+ " (datetime.datetime(2019, 7, 23, 16, 16, 48), datetime.datetime(2019,
7, 23, 16, 15)),\n",
+ " (datetime.datetime(2019, 7, 23, 9, 58, 51), datetime.datetime(2019,
7, 23, 9, 55)),\n",
+ " (datetime.datetime(2019, 4, 29, 14, 37, 2), datetime.datetime(2019,
4, 29, 14, 35)),\n",
+ " (datetime.datetime(2019, 3, 5, 11, 14, 2), datetime.datetime(2019, 3,
5, 11, 10)),\n",
+ " (datetime.datetime(2019, 7, 23, 13, 49, 35), datetime.datetime(2019,
7, 23, 13, 45)),\n",
+ " (datetime.datetime(2019, 7, 23, 10, 10, 10), datetime.datetime(2019,
7, 23, 10, 10)),\n",
+ " (datetime.datetime(2019, 7, 23, 10, 46, 44), datetime.datetime(2019,
7, 23, 10, 45)),\n",
+ " (datetime.datetime(2019, 7, 23, 15, 6, 49), datetime.datetime(2019,
7, 23, 15, 5)),\n",
+ " (datetime.datetime(2019, 7, 23, 9, 54, 46), datetime.datetime(2019,
7, 23, 9, 50))]"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "SELECT completion_date, interval_bound(completion_date, '5 minutes')\n",
+ "FROM chicago_potholes_patched LIMIT 10;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Gap filling\n",
+ "Notice gap in days"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>date</th>\n",
+ " <th>sum</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-03 00:00:00</td>\n",
+ " <td>1605</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-04 00:00:00</td>\n",
+ " <td>2206</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-05 00:00:00</td>\n",
+ " <td>1814</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-06 00:00:00</td>\n",
+ " <td>1365</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-07 00:00:00</td>\n",
+ " <td>1973</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-10 00:00:00</td>\n",
+ " <td>514</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-11 00:00:00</td>\n",
+ " <td>1408</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-12 00:00:00</td>\n",
+ " <td>1714</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-13 00:00:00</td>\n",
+ " <td>1550</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-14 00:00:00</td>\n",
+ " <td>1415</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(datetime.datetime(2019, 6, 3, 0, 0), 1605L),\n",
+ " (datetime.datetime(2019, 6, 4, 0, 0), 2206L),\n",
+ " (datetime.datetime(2019, 6, 5, 0, 0), 1814L),\n",
+ " (datetime.datetime(2019, 6, 6, 0, 0), 1365L),\n",
+ " (datetime.datetime(2019, 6, 7, 0, 0), 1973L),\n",
+ " (datetime.datetime(2019, 6, 10, 0, 0), 514L),\n",
+ " (datetime.datetime(2019, 6, 11, 0, 0), 1408L),\n",
+ " (datetime.datetime(2019, 6, 12, 0, 0), 1714L),\n",
+ " (datetime.datetime(2019, 6, 13, 0, 0), 1550L),\n",
+ " (datetime.datetime(2019, 6, 14, 0, 0), 1415L)]"
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "\n",
+ "SELECT interval_bound(completion_date, '1 day') as date,
sum(number_of_potholes_filled_on_block)\n",
+ "FROM chicago_potholes_patched \n",
+ "WHERE completion_date >= '2019-06-01'\n",
+ "AND completion_date <= '2019-06-15'\n",
+ "GROUP BY date ORDER BY date;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Generate dates you want (with no gaps)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "15 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>date</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-01 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-02 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-03 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-04 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-05 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-06 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-07 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-08 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-09 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-10 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-11 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-12 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-13 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-14 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-15 00:00:00+00:00</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(datetime.datetime(2019, 6, 1, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 2, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 3, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 4, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 5, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 6, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 7, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 8, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 9, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 10, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 11, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 12, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 13, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 14, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),),\n",
+ " (datetime.datetime(2019, 6, 15, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)),)]"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "\n",
+ "SELECT date FROM generate_series('2019-06-01'::date,'2019-06-15','1 day')
date\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Gap filling query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "15 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>date</th>\n",
+ " <th>sum</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-01 00:00:00+00:00</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-02 00:00:00+00:00</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-03 00:00:00+00:00</td>\n",
+ " <td>1605</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-04 00:00:00+00:00</td>\n",
+ " <td>2206</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-05 00:00:00+00:00</td>\n",
+ " <td>1814</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-06 00:00:00+00:00</td>\n",
+ " <td>1365</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-07 00:00:00+00:00</td>\n",
+ " <td>1973</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-08 00:00:00+00:00</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-09 00:00:00+00:00</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-10 00:00:00+00:00</td>\n",
+ " <td>514</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-11 00:00:00+00:00</td>\n",
+ " <td>1408</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-12 00:00:00+00:00</td>\n",
+ " <td>1714</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-13 00:00:00+00:00</td>\n",
+ " <td>1550</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-14 00:00:00+00:00</td>\n",
+ " <td>1415</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2019-06-15 00:00:00+00:00</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(datetime.datetime(2019, 6, 1, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 0L),\n",
+ " (datetime.datetime(2019, 6, 2, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 0L),\n",
+ " (datetime.datetime(2019, 6, 3, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 1605L),\n",
+ " (datetime.datetime(2019, 6, 4, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 2206L),\n",
+ " (datetime.datetime(2019, 6, 5, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 1814L),\n",
+ " (datetime.datetime(2019, 6, 6, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 1365L),\n",
+ " (datetime.datetime(2019, 6, 7, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 1973L),\n",
+ " (datetime.datetime(2019, 6, 8, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 0L),\n",
+ " (datetime.datetime(2019, 6, 9, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 0L),\n",
+ " (datetime.datetime(2019, 6, 10, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 514L),\n",
+ " (datetime.datetime(2019, 6, 11, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 1408L),\n",
+ " (datetime.datetime(2019, 6, 12, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 1714L),\n",
+ " (datetime.datetime(2019, 6, 13, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 1550L),\n",
+ " (datetime.datetime(2019, 6, 14, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 1415L),\n",
+ " (datetime.datetime(2019, 6, 15, 0, 0,
tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)), 0L)]"
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "WITH\n",
+ " bounded AS (\n",
+ " SELECT interval_bound(completion_date, '1 day') AS
date,number_of_potholes_filled_on_block\n",
+ " FROM chicago_potholes_patched\n",
+ " WHERE completion_date >= '2019-06-01' AND completion_date <=
'2019-06-15'\n",
+ " ),\n",
+ " dense AS (\n",
+ " SELECT slice AS date\n",
+ " FROM generate_series('2019-06-01'::date,'2019-06-15','1 day')
s(slice)\n",
+ " )\n",
+ "SELECT date, sum(coalesce(number_of_potholes_filled_on_block,0))\n",
+ "FROM bounded NATURAL RIGHT JOIN dense\n",
+ "GROUP BY date ORDER BY date;"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}