mistercrunch closed pull request #4651: [explore] proper filtering of NULLs and
''
URL: https://github.com/apache/incubator-superset/pull/4651
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/superset/assets/package.json b/superset/assets/package.json
index ceb34c74a2..c4911b3101 100644
--- a/superset/assets/package.json
+++ b/superset/assets/package.json
@@ -74,6 +74,7 @@
"moment": "^2.20.1",
"mousetrap": "^1.6.1",
"mustache": "^2.2.1",
+ "npm": "^5.7.1",
"nvd3": "1.8.6",
"object.entries": "^1.0.4",
"object.keys": "^0.1.0",
diff --git
a/superset/assets/spec/javascripts/explore/components/FilterControl_spec.jsx
b/superset/assets/spec/javascripts/explore/components/FilterControl_spec.jsx
index 69f8b2794f..2b83fff457 100644
--- a/superset/assets/spec/javascripts/explore/components/FilterControl_spec.jsx
+++ b/superset/assets/spec/javascripts/explore/components/FilterControl_spec.jsx
@@ -225,12 +225,13 @@ describe('FilterControl', () => {
wrapper.instance().fetchFilterValues(0, 'col1');
expect(wrapper.state().activeRequest).to.equal(spyReq);
// Sets active to null after success
- $.ajax.getCall(0).args[0].success('choices');
+ $.ajax.getCall(0).args[0].success(['opt1', 'opt2', null, '']);
expect(wrapper.state().filters[0].valuesLoading).to.equal(false);
- expect(wrapper.state().filters[0].valueChoices).to.equal('choices');
+ expect(wrapper.state().filters[0].valueChoices).to.deep.equal(['opt1',
'opt2', null, '']);
expect(wrapper.state().activeRequest).to.equal(null);
});
+
it('cancels active request if another is submitted', () => {
const spyReq = sinon.spy();
spyReq.abort = sinon.spy();
diff --git
a/superset/assets/spec/javascripts/explore/components/Filter_spec.jsx
b/superset/assets/spec/javascripts/explore/components/Filter_spec.jsx
index 1a4c395807..27425de9a3 100644
--- a/superset/assets/spec/javascripts/explore/components/Filter_spec.jsx
+++ b/superset/assets/spec/javascripts/explore/components/Filter_spec.jsx
@@ -46,7 +46,7 @@ describe('Filter', () => {
expect(wrapper.find(Select)).to.have.lengthOf(2);
expect(wrapper.find(Button)).to.have.lengthOf(1);
expect(wrapper.find(SelectControl)).to.have.lengthOf(1);
- expect(wrapper.find('#select-op').prop('options')).to.have.lengthOf(8);
+ expect(wrapper.find('#select-op').prop('options')).to.have.lengthOf(10);
});
it('renders five op choices for table datasource', () => {
@@ -58,7 +58,7 @@ describe('Filter', () => {
filterable_cols: ['country_name'],
};
const druidWrapper = shallow(<Filter {...props} />);
-
expect(druidWrapper.find('#select-op').prop('options')).to.have.lengthOf(9);
+
expect(druidWrapper.find('#select-op').prop('options')).to.have.lengthOf(11);
});
it('renders six op choices for having filter', () => {
diff --git a/superset/assets/spec/javascripts/utils/common_spec.jsx
b/superset/assets/spec/javascripts/utils/common_spec.jsx
index 5aa4b4334f..861c38ae53 100644
--- a/superset/assets/spec/javascripts/utils/common_spec.jsx
+++ b/superset/assets/spec/javascripts/utils/common_spec.jsx
@@ -1,6 +1,6 @@
import { it, describe } from 'mocha';
import { expect } from 'chai';
-import { isTruthy } from '../../../src/utils/common';
+import { isTruthy, optionFromValue } from '../../../src/utils/common';
describe('utils/common', () => {
describe('isTruthy', () => {
@@ -40,4 +40,14 @@ describe('utils/common', () => {
expect(isTruthy('false')).to.equal(false);
});
});
+ describe('optionFromValue', () => {
+ it('converts values as expected', () => {
+ expect(optionFromValue(false)).to.deep.equal({ value: false, label:
'<false>' });
+ expect(optionFromValue(true)).to.deep.equal({ value: true, label:
'<true>' });
+ expect(optionFromValue(null)).to.deep.equal({ value: '<NULL>', label:
'<NULL>' });
+ expect(optionFromValue('')).to.deep.equal({ value: '', label: '<empty
string>' });
+ expect(optionFromValue('foo')).to.deep.equal({ value: 'foo', label:
'foo' });
+ expect(optionFromValue(5)).to.deep.equal({ value: 5, label: '5' });
+ });
+ });
});
diff --git a/superset/assets/src/SqlLab/actions.js
b/superset/assets/src/SqlLab/actions.js
index 04a9a5eae4..644947023b 100644
--- a/superset/assets/src/SqlLab/actions.js
+++ b/superset/assets/src/SqlLab/actions.js
@@ -2,6 +2,7 @@
import shortid from 'shortid';
import { now } from '../modules/dates';
import { t } from '../locales';
+import { COMMON_ERR_MESSAGES } from '../common';
const $ = require('jquery');
@@ -163,7 +164,7 @@ export function runQuery(query) {
}
}
if (msg.indexOf('CSRF token') > 0) {
- msg = t('Your session timed out, please refresh your page and try
again.');
+ msg = COMMON_ERR_MESSAGES.SESSION_TIMED_OUT;
}
dispatch(queryFailed(query, msg));
},
diff --git a/superset/assets/src/chart/chartAction.js
b/superset/assets/src/chart/chartAction.js
index b9338a91f5..a2f01165d6 100644
--- a/superset/assets/src/chart/chartAction.js
+++ b/superset/assets/src/chart/chartAction.js
@@ -1,6 +1,7 @@
import { getExploreUrlAndPayload, getAnnotationJsonUrl } from
'../explore/exploreUtils';
import { requiresQuery, ANNOTATION_SOURCE_TYPES } from
'../modules/AnnotationTypes';
import { Logger, LOG_ACTIONS_LOAD_EVENT } from '../logger';
+import { COMMON_ERR_MESSAGES } from '../common';
const $ = window.$ = require('jquery');
@@ -160,12 +161,16 @@ export function runQuery(formData, force = false, timeout
= 60, key) {
errObject = err.responseJSON;
} else if (err.stack) {
errObject = {
- error: 'Unexpected error: ' + err.description,
+ error: t('Unexpected error: ') + err.description,
stacktrace: err.stack,
};
+ } else if (err.responseText && err.responseText.indexOf('CSRF') >=
0) {
+ errObject = {
+ error: COMMON_ERR_MESSAGES.SESSION_TIMED_OUT,
+ };
} else {
errObject = {
- error: 'Unexpected error.',
+ error: t('Unexpected error.'),
};
}
dispatch(chartUpdateFailed(errObject, key));
diff --git a/superset/assets/src/common.js b/superset/assets/src/common.js
index d84f064065..cc509eb892 100644
--- a/superset/assets/src/common.js
+++ b/superset/assets/src/common.js
@@ -1,5 +1,6 @@
/* eslint-disable global-require */
import $ from 'jquery';
+import { t } from './locales';
const utils = require('./modules/utils');
@@ -30,3 +31,8 @@ export function appSetup() {
window.jQuery = $;
require('bootstrap');
}
+
+// Error messages used in many places across applications
+export const COMMON_ERR_MESSAGES = {
+ SESSION_TIMED_OUT: t('Your session timed out, please refresh your page and
try again.'),
+};
diff --git a/superset/assets/src/components/AlteredSliceTag.jsx
b/superset/assets/src/components/AlteredSliceTag.jsx
index eb24424e8d..ad1356b48b 100644
--- a/superset/assets/src/components/AlteredSliceTag.jsx
+++ b/superset/assets/src/components/AlteredSliceTag.jsx
@@ -61,7 +61,7 @@ export default class AlteredSliceTag extends React.Component {
return '[]';
}
return value.map((v) => {
- const filterVal = v.val.constructor === Array ? `[${v.val.join(',
')}]` : v.val;
+ const filterVal = v.val && v.val.constructor === Array ?
`[${v.val.join(', ')}]` : v.val;
return `${v.col} ${v.op} ${filterVal}`;
}).join(', ');
} else if (controls[key] && controls[key].type === 'BoundsControl') {
diff --git a/superset/assets/src/explore/components/controls/Filter.jsx
b/superset/assets/src/explore/components/controls/Filter.jsx
index 49a9751f3b..539a4133ae 100644
--- a/superset/assets/src/explore/components/controls/Filter.jsx
+++ b/superset/assets/src/explore/components/controls/Filter.jsx
@@ -2,8 +2,8 @@ import React from 'react';
import PropTypes from 'prop-types';
import Select from 'react-select';
import { Button, Row, Col } from 'react-bootstrap';
-import SelectControl from './SelectControl';
import { t } from '../../../locales';
+import SelectControl from './SelectControl';
const operatorsArr = [
{ val: 'in', type: 'array', useSelect: true, multi: true },
@@ -16,6 +16,8 @@ const operatorsArr = [
{ val: '<', type: 'string', havingOnly: true },
{ val: 'regex', type: 'string', datasourceTypes: ['druid'] },
{ val: 'LIKE', type: 'string', datasourceTypes: ['table'] },
+ { val: 'IS NULL', type: null },
+ { val: 'IS NOT NULL', type: null },
];
const operators = {};
operatorsArr.forEach((op) => {
@@ -90,6 +92,10 @@ export default class Filter extends React.Component {
renderFilterFormControl(filter) {
const operator = operators[filter.op];
+ if (operator.type === null) {
+ // IS NULL or IS NOT NULL
+ return null;
+ }
if (operator.useSelect && !this.props.having) {
// TODO should use a simple Select, not a control here...
return (
diff --git a/superset/assets/src/utils/common.js
b/superset/assets/src/utils/common.js
index f2c3bd24ce..093882aa90 100644
--- a/superset/assets/src/utils/common.js
+++ b/superset/assets/src/utils/common.js
@@ -103,3 +103,30 @@ export function isTruthy(obj) {
}
return !!obj;
}
+
+export function optionLabel(opt) {
+ if (opt === null) {
+ return '<NULL>';
+ } else if (opt === '') {
+ return '<empty string>';
+ } else if (opt === true) {
+ return '<true>';
+ } else if (opt === false) {
+ return '<false>';
+ } else if (typeof opt !== 'string' && opt.toString) {
+ return opt.toString();
+ }
+ return opt;
+}
+
+export function optionValue(opt) {
+ if (opt === null) {
+ return '<NULL>';
+ }
+ return opt;
+}
+
+export function optionFromValue(opt) {
+ // From a list of options, handles special values & labels
+ return { value: optionValue(opt), label: optionLabel(opt) };
+}
diff --git a/superset/connectors/base/models.py
b/superset/connectors/base/models.py
index 68a020e36a..8e4a2a2245 100644
--- a/superset/connectors/base/models.py
+++ b/superset/connectors/base/models.py
@@ -6,6 +6,7 @@
import json
+from past.builtins import basestring
from sqlalchemy import (
and_, Boolean, Column, Integer, String, Text,
)
@@ -185,6 +186,35 @@ def data(self):
'verbose_map': verbose_map,
}
+ @staticmethod
+ def filter_values_handler(
+ values, target_column_is_numeric=False, is_list_target=False):
+ def handle_single_value(v):
+ # backward compatibility with previous <select> components
+ if isinstance(v, basestring):
+ v = v.strip('\t\n \'"')
+ if target_column_is_numeric:
+ # For backwards compatibility and edge cases
+ # where a column data type might have changed
+ v = utils.string_to_num(v)
+ if v == '<NULL>':
+ return None
+ elif v == '<empty string>':
+ return ''
+ return v
+ if isinstance(values, (list, tuple)):
+ values = [handle_single_value(v) for v in values]
+ else:
+ values = handle_single_value(values)
+ if is_list_target and not isinstance(values, (tuple, list)):
+ values = [values]
+ elif not is_list_target and isinstance(values, (tuple, list)):
+ if len(values) > 0:
+ values = values[0]
+ else:
+ values = None
+ return values
+
def get_query_str(self, query_obj):
"""Returns a query as a string
diff --git a/superset/connectors/druid/models.py
b/superset/connectors/druid/models.py
index 25c68acae1..26e3c721a0 100644
--- a/superset/connectors/druid/models.py
+++ b/superset/connectors/druid/models.py
@@ -1277,6 +1277,21 @@ def run_query( # noqa / druid
client.query_builder.last_query.query_dict, indent=2)
return query_str
+ @staticmethod
+ def homogenize_types(df, groupby_cols):
+ """Converting all GROUPBY columns to strings
+
+ When grouping by a numeric (say FLOAT) column, pydruid returns
+ strings in the dataframe. This creates issues downstream related
+ to having mixed types in the dataframe
+
+ Here we replace None with <NULL> and make the whole series a
+ str instead of an object.
+ """
+ for col in groupby_cols:
+ df[col] = df[col].fillna('<NULL>').astype(str)
+ return df
+
def query(self, query_obj):
qry_start_dttm = datetime.now()
client = self.cluster.get_pydruid_client()
@@ -1284,6 +1299,8 @@ def query(self, query_obj):
client=client, query_obj=query_obj, phase=2)
df = client.export_pandas()
+ df = self.homogenize_types(df, query_obj.get('groupby', []))
+
if df is None or df.size == 0:
raise Exception(_('No data was returned.'))
df.columns = [
@@ -1322,40 +1339,31 @@ def increment_timestamp(ts):
query=query_str,
duration=datetime.now() - qry_start_dttm)
- @staticmethod
- def get_filters(raw_filters, num_cols): # noqa
+ @classmethod
+ def get_filters(cls, raw_filters, num_cols): # noqa
+ """Given Superset filter data structure, returns pydruid Filter(s)"""
filters = None
for flt in raw_filters:
- if not all(f in flt for f in ['col', 'op', 'val']):
+ col = flt.get('col')
+ op = flt.get('op')
+ eq = flt.get('val')
+ if (
+ not col or
+ not op or
+ (eq is None and op not in ('IS NULL', 'IS NOT NULL'))):
continue
-
- col = flt['col']
- op = flt['op']
- eq = flt['val']
cond = None
- if op in ('in', 'not in'):
- eq = [
- types.replace('"', '').strip()
- if isinstance(types, string_types)
- else types
- for types in eq]
- elif not isinstance(flt['val'], string_types):
- eq = eq[0] if eq and len(eq) > 0 else ''
-
is_numeric_col = col in num_cols
- if is_numeric_col:
- if op in ('in', 'not in'):
- eq = [utils.string_to_num(v) for v in eq]
- else:
- eq = utils.string_to_num(eq)
-
+ is_list_target = op in ('in', 'not in')
+ eq = cls.filter_values_handler(
+ eq, is_list_target=is_list_target,
+ target_column_is_numeric=is_numeric_col)
if op == '==':
cond = Dimension(col) == eq
elif op == '!=':
cond = Dimension(col) != eq
elif op in ('in', 'not in'):
fields = []
-
# ignore the filter if it has no value
if not len(eq):
continue
@@ -1365,10 +1373,8 @@ def get_filters(raw_filters, num_cols): # noqa
for s in eq:
fields.append(Dimension(col) == s)
cond = Filter(type='or', fields=fields)
-
if op == 'not in':
cond = ~cond
-
elif op == 'regex':
cond = Filter(type='regex', pattern=eq, dimension=col)
elif op == '>=':
@@ -1385,6 +1391,10 @@ def get_filters(raw_filters, num_cols): # noqa
col, None, eq,
upperStrict=True, alphaNumeric=is_numeric_col,
)
+ elif op == 'IS NULL':
+ cond = Dimension(col) == None # NOQA
+ elif op == 'IS NOT NULL':
+ cond = Dimension(col) != None # NOQA
if filters:
filters = Filter(type='and', fields=[
diff --git a/superset/connectors/sqla/models.py
b/superset/connectors/sqla/models.py
index 7fbb7ae44d..86528a3c06 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -11,7 +11,6 @@
from flask_appbuilder import Model
from flask_babel import lazy_gettext as _
import pandas as pd
-from past.builtins import basestring
import six
import sqlalchemy as sa
from sqlalchemy import (
@@ -574,28 +573,21 @@ def get_sqla_query( # sqla
where_clause_and = []
having_clause_and = []
for flt in filter:
- if not all([flt.get(s) for s in ['col', 'op', 'val']]):
+ if not all([flt.get(s) for s in ['col', 'op']]):
continue
col = flt['col']
op = flt['op']
- eq = flt['val']
col_obj = cols.get(col)
+ is_list_target = op in ('in', 'not in')
+ eq = self.filter_values_handler(
+ flt.get('val'),
+ target_column_is_numeric=col_obj.is_num,
+ is_list_target=is_list_target)
if col_obj:
if op in ('in', 'not in'):
- values = []
- for v in eq:
- # For backwards compatibility and edge cases
- # where a column data type might have changed
- if isinstance(v, basestring):
- v = v.strip("'").strip('"')
- if col_obj.is_num:
- v = utils.string_to_num(v)
-
- # Removing empty strings and non numeric values
- # targeting numeric columns
- if v is not None:
- values.append(v)
- cond = col_obj.sqla_col.in_(values)
+ cond = col_obj.sqla_col.in_(eq)
+ if '<NULL>' in eq:
+ cond = or_(cond, col_obj.sqla_col == None) # noqa
if op == 'not in':
cond = ~cond
where_clause_and.append(cond)
@@ -616,6 +608,10 @@ def get_sqla_query( # sqla
where_clause_and.append(col_obj.sqla_col <= eq)
elif op == 'LIKE':
where_clause_and.append(col_obj.sqla_col.like(eq))
+ elif op == 'IS NULL':
+ where_clause_and.append(col_obj.sqla_col == None) #
noqa
+ elif op == 'IS NOT NULL':
+ where_clause_and.append(col_obj.sqla_col != None) #
noqa
if extras:
where = extras.get('where')
if where:
diff --git a/tests/druid_func_tests.py b/tests/druid_func_tests.py
index 3082bec2b3..c367bd7ad7 100644
--- a/tests/druid_func_tests.py
+++ b/tests/druid_func_tests.py
@@ -34,7 +34,7 @@ class DruidFuncTestCase(unittest.TestCase):
def test_get_filters_ignores_invalid_filter_objects(self):
filtr = {'col': 'col1', 'op': '=='}
filters = [filtr]
- self.assertEqual(None, DruidDatasource.get_filters(filters, []))
+ self.assertIsNone(DruidDatasource.get_filters(filters, []))
def test_get_filters_constructs_filter_in(self):
filtr = {'col': 'A', 'op': 'in', 'val': ['a', 'b', 'c']}
@@ -108,7 +108,7 @@ def
test_get_filters_ignores_in_not_in_with_empty_value(self):
filtr1 = {'col': 'A', 'op': 'in', 'val': []}
filtr2 = {'col': 'A', 'op': 'not in', 'val': []}
res = DruidDatasource.get_filters([filtr1, filtr2], [])
- self.assertEqual(None, res)
+ self.assertIsNone(res)
def test_get_filters_constructs_equals_for_in_not_in_single_value(self):
filtr = {'col': 'A', 'op': 'in', 'val': ['a']}
@@ -119,14 +119,15 @@ def
test_get_filters_handles_arrays_for_string_types(self):
filtr = {'col': 'A', 'op': '==', 'val': ['a', 'b']}
res = DruidDatasource.get_filters([filtr], [])
self.assertEqual('a', res.filter['filter']['value'])
+
filtr = {'col': 'A', 'op': '==', 'val': []}
res = DruidDatasource.get_filters([filtr], [])
- self.assertEqual('', res.filter['filter']['value'])
+ self.assertIsNone(res.filter['filter']['value'])
def test_get_filters_handles_none_for_string_types(self):
filtr = {'col': 'A', 'op': '==', 'val': None}
res = DruidDatasource.get_filters([filtr], [])
- self.assertEqual('', res.filter['filter']['value'])
+ self.assertIsNone(res)
def test_get_filters_extracts_values_in_quotes(self):
filtr = {'col': 'A', 'op': 'in', 'val': [' "a" ']}
diff --git a/tests/druid_tests.py b/tests/druid_tests.py
index d03378b0e4..6947a6cd7d 100644
--- a/tests/druid_tests.py
+++ b/tests/druid_tests.py
@@ -61,6 +61,7 @@ def __reduce__(self):
'timestamp': '2012-01-01T00:00:00.000Z',
'event': {
'dim1': 'Canada',
+ 'dim2': 'boy',
'metric1': 12345678,
},
},
@@ -69,6 +70,7 @@ def __reduce__(self):
'timestamp': '2012-01-01T00:00:00.000Z',
'event': {
'dim1': 'USA',
+ 'dim2': 'girl',
'metric1': 12345678 / 2,
},
},
@@ -165,7 +167,7 @@ def test_client(self, PyDruid):
'row_limit': 5000,
'include_search': 'false',
'metrics': ['count'],
- 'groupby': ['dim1', 'dim2d'],
+ 'groupby': ['dim1', 'dim2'],
'force': 'true',
}
# two groupby
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services