This is an automated email from the ASF dual-hosted git repository.
johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new 6d7701e [fix] Updating Pandas resample logic (#7799)
6d7701e is described below
commit 6d7701e68493cc2eb04f1486c1c9f27037835a3c
Author: John Bodley <[email protected]>
AuthorDate: Tue Jul 2 11:10:50 2019 -0700
[fix] Updating Pandas resample logic (#7799)
---
.../assets/src/explore/controlPanels/sections.jsx | 2 +-
superset/assets/src/explore/controls.jsx | 19 +---
.../migrations/versions/ab8c66efdd01_resample.py | 117 +++++++++++++++++++++
superset/viz.py | 16 ++-
tests/viz_tests.py | 33 ++++++
5 files changed, 163 insertions(+), 24 deletions(-)
diff --git a/superset/assets/src/explore/controlPanels/sections.jsx
b/superset/assets/src/explore/controlPanels/sections.jsx
index e4ad95c..7df0488 100644
--- a/superset/assets/src/explore/controlPanels/sections.jsx
+++ b/superset/assets/src/explore/controlPanels/sections.jsx
@@ -97,7 +97,7 @@ export const NVD3TimeSeries = [
['time_compare', 'comparison_type'],
[<h1 className="section-header">{t('Python Functions')}</h1>],
[<h2 className="section-header">pandas.resample</h2>],
- ['resample_how', 'resample_rule', 'resample_fillmethod'],
+ ['resample_rule', 'resample_method'],
],
},
];
diff --git a/superset/assets/src/explore/controls.jsx
b/superset/assets/src/explore/controls.jsx
index 2ae104b..6f3578b 100644
--- a/superset/assets/src/explore/controls.jsx
+++ b/superset/assets/src/explore/controls.jsx
@@ -932,26 +932,17 @@ export const controls = {
freeForm: true,
label: t('Rule'),
default: null,
- choices: formatSelectOptions(['', '1T', '1H', '1D', '7D', '1M', '1AS']),
+ choices: formatSelectOptions(['1T', '1H', '1D', '7D', '1M', '1AS']),
description: t('Pandas resample rule'),
},
- resample_how: {
+ resample_method: {
type: 'SelectControl',
freeForm: true,
- label: t('How'),
+ label: t('Method'),
default: null,
- choices: formatSelectOptions(['', 'mean', 'sum', 'median']),
- description: t('Pandas resample how'),
- },
-
- resample_fillmethod: {
- type: 'SelectControl',
- freeForm: true,
- label: t('Fill Method'),
- default: null,
- choices: formatSelectOptions(['', 'ffill', 'bfill']),
- description: t('Pandas resample fill method'),
+ choices: formatSelectOptions(['asfreq', 'bfill', 'ffill', 'median',
'mean', 'sum']),
+ description: t('Pandas resample method'),
},
time_range: {
diff --git a/superset/migrations/versions/ab8c66efdd01_resample.py
b/superset/migrations/versions/ab8c66efdd01_resample.py
new file mode 100644
index 0000000..aa7bf86
--- /dev/null
+++ b/superset/migrations/versions/ab8c66efdd01_resample.py
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""resample
+
+Revision ID: ab8c66efdd01
+Revises: d7c1a0d6f2da
+Create Date: 2019-06-28 13:17:59.517089
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = "ab8c66efdd01"
+down_revision = "d7c1a0d6f2da"
+
+import json
+import logging
+
+from alembic import op
+from sqlalchemy import Column, Integer, Text
+from sqlalchemy.ext.declarative import declarative_base
+
+from superset import db
+
+Base = declarative_base()
+
+
+class Slice(Base):
+ __tablename__ = "slices"
+
+ id = Column(Integer, primary_key=True)
+ params = Column(Text)
+
+
+def upgrade():
+ bind = op.get_bind()
+ session = db.Session(bind=bind)
+
+ for slc in session.query(Slice).all():
+ try:
+ params = json.loads(slc.params)
+
+ # Note that the resample params could be encoded as empty strings.
+ if "resample_rule" in params:
+ rule = params["resample_rule"]
+
+ # Per the old logic how takes precedence over fill-method.
Note that
+ # due to UI options, alongside None, empty strings were viable
choices
+ # hence the truthiness checks.
+ if rule:
+ how = None
+
+ if "resample_how" in params:
+ how = params["resample_how"]
+
+ if how:
+ params["resample_method"] = how
+
+ if not how and "fill_method" in params:
+ fill_method = params["resample_fillmethod"]
+
+ if fill_method:
+ params["resample_method"] = fill_method
+
+ # Ensure that the resample logic is fully defined.
+ if not "resample_method" in params:
+ del params["resample_rule"]
+ else:
+ del params["resample_rule"]
+
+ # Finally remove any erroneous legacy fields.
+ params.pop("resample_fillmethod", None)
+ params.pop("resample_how", None)
+ slc.params = json.dumps(params, sort_keys=True)
+ except Exception as e:
+ logging.exception(e)
+
+ session.commit()
+ session.close()
+
+
+def downgrade():
+ bind = op.get_bind()
+ session = db.Session(bind=bind)
+
+ for slc in session.query(Slice).all():
+ try:
+ params = json.loads(slc.params)
+
+ if "resample_method" in params:
+ method = params["resample_method"]
+
+ if method in ["asfreq", "bfill", "ffill"]:
+ params["resample_fillmethod"] = method
+ else:
+ params["resample_how"] = method
+
+ del params["resample_method"]
+ slc.params = json.dumps(params, sort_keys=True)
+ except Exception as e:
+ logging.exception(e)
+
+ session.commit()
+ session.close()
diff --git a/superset/viz.py b/superset/viz.py
index b8d7770..7ffd753 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -1143,11 +1143,13 @@ class NVD3TimeSeriesViz(NVD3Viz):
if fd.get("granularity") == "all":
raise Exception(_("Pick a time granularity for your time series"))
- if not aggregate:
+ if aggregate:
df = df.pivot_table(
index=DTTM_ALIAS,
columns=fd.get("groupby"),
values=self.metric_labels,
+ fill_value=0,
+ aggfunc=sum,
dropna=False,
)
else:
@@ -1155,18 +1157,14 @@ class NVD3TimeSeriesViz(NVD3Viz):
index=DTTM_ALIAS,
columns=fd.get("groupby"),
values=self.metric_labels,
- fill_value=0,
- aggfunc=sum,
dropna=False,
)
- fm = fd.get("resample_fillmethod")
- if not fm:
- fm = None
- how = fd.get("resample_how")
rule = fd.get("resample_rule")
- if how and rule:
- df = df.resample(rule, how=how, fill_method=fm)
+ method = fd.get("resample_method")
+
+ if rule and method:
+ df = getattr(df.resample(rule), method)()
if self.sort_series:
dfs = df.sum()
diff --git a/tests/viz_tests.py b/tests/viz_tests.py
index 085398e..136fdf8 100644
--- a/tests/viz_tests.py
+++ b/tests/viz_tests.py
@@ -18,6 +18,7 @@ from datetime import datetime
from unittest.mock import Mock, patch
import uuid
+import numpy as np
import pandas as pd
from superset import app
@@ -1047,3 +1048,35 @@ class TimeSeriesVizTestCase(SupersetTestCase):
},
]
self.assertEqual(expected, viz_data)
+
+ def test_process_data_resample(self):
+ datasource = self.get_datasource_mock()
+
+ df = pd.DataFrame(
+ {
+ "__timestamp": pd.to_datetime(
+ ["2019-01-01", "2019-01-02", "2019-01-05", "2019-01-07"]
+ ),
+ "y": [1.0, 2.0, 5.0, 7.0],
+ }
+ )
+
+ self.assertEqual(
+ viz.NVD3TimeSeriesViz(
+ datasource,
+ {"metrics": ["y"], "resample_method": "sum", "resample_rule":
"1D"},
+ )
+ .process_data(df)["y"]
+ .tolist(),
+ [1.0, 2.0, 0.0, 0.0, 5.0, 0.0, 7.0],
+ )
+
+ np.testing.assert_equal(
+ viz.NVD3TimeSeriesViz(
+ datasource,
+ {"metrics": ["y"], "resample_method": "asfreq",
"resample_rule": "1D"},
+ )
+ .process_data(df)["y"]
+ .tolist(),
+ [1.0, 2.0, np.nan, np.nan, 5.0, np.nan, 7.0],
+ )