This is an automated email from the ASF dual-hosted git repository.
yongjiezhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new b870a21eaa refactor: remove unused flatten function (#20582)
b870a21eaa is described below
commit b870a21eaa9359b4cf3bd4a23fe72c77387ab7dc
Author: Yongjie Zhao <[email protected]>
AuthorDate: Fri Jul 1 19:17:55 2022 +0800
refactor: remove unused flatten function (#20582)
---
.github/workflows/caches.js | 2 +
.../src/operators/pivotOperator.ts | 2 -
.../src/operators/timeComparePivotOperator.ts | 2 -
.../test/operators/pivotOperator.test.ts | 8 --
.../test/operators/timeCompareOperator.test.ts | 2 -
.../operators/timeComparePivotOperator.test.ts | 6 -
.../src/query/types/PostProcessing.ts | 2 -
.../test/query/types/PostProcessing.test.ts | 2 -
.../test/MixedTimeseries/buildQuery.test.ts | 8 --
superset/utils/pandas_postprocessing/__init__.py | 2 -
superset/utils/pandas_postprocessing/pivot.py | 13 ---
superset/utils/pandas_postprocessing/utils.py | 28 +----
.../pandas_postprocessing/test_compare.py | 2 -
tests/unit_tests/pandas_postprocessing/test_cum.py | 4 -
.../unit_tests/pandas_postprocessing/test_pivot.py | 129 ++++-----------------
.../pandas_postprocessing/test_resample.py | 2 -
.../pandas_postprocessing/test_rolling.py | 6 -
17 files changed, 28 insertions(+), 192 deletions(-)
diff --git a/.github/workflows/caches.js b/.github/workflows/caches.js
index 39d0ad2e6a..66fd7ee959 100644
--- a/.github/workflows/caches.js
+++ b/.github/workflows/caches.js
@@ -25,6 +25,8 @@ const assetsConfig = {
path: [`${workspaceDirectory}/superset/static/assets`],
hashFiles: [
`${workspaceDirectory}/superset-frontend/src/**/*`,
+ `${workspaceDirectory}/superset-frontend/packages/**/*`,
+ `${workspaceDirectory}/superset-frontend/plugins/**/*`,
`${workspaceDirectory}/superset-frontend/*.js`,
`${workspaceDirectory}/superset-frontend/*.json`,
],
diff --git
a/superset-frontend/packages/superset-ui-chart-controls/src/operators/pivotOperator.ts
b/superset-frontend/packages/superset-ui-chart-controls/src/operators/pivotOperator.ts
index f8e9f025b0..4d8c1f6d76 100644
---
a/superset-frontend/packages/superset-ui-chart-controls/src/operators/pivotOperator.ts
+++
b/superset-frontend/packages/superset-ui-chart-controls/src/operators/pivotOperator.ts
@@ -45,8 +45,6 @@ export const pivotOperator:
PostProcessingFactory<PostProcessingPivot> = (
metricLabels.map(metric => [metric, { operator: 'mean' }]),
),
drop_missing_columns: false,
- flatten_columns: false,
- reset_index: false,
},
};
}
diff --git
a/superset-frontend/packages/superset-ui-chart-controls/src/operators/timeComparePivotOperator.ts
b/superset-frontend/packages/superset-ui-chart-controls/src/operators/timeComparePivotOperator.ts
index 3851d62a36..b8543a1f4f 100644
---
a/superset-frontend/packages/superset-ui-chart-controls/src/operators/timeComparePivotOperator.ts
+++
b/superset-frontend/packages/superset-ui-chart-controls/src/operators/timeComparePivotOperator.ts
@@ -47,8 +47,6 @@ export const timeComparePivotOperator:
PostProcessingFactory<PostProcessingPivot
index,
columns: ensureIsArray(queryObject.columns).map(getColumnLabel),
drop_missing_columns: false,
- flatten_columns: false,
- reset_index: false,
aggregates,
},
};
diff --git
a/superset-frontend/packages/superset-ui-chart-controls/test/operators/pivotOperator.test.ts
b/superset-frontend/packages/superset-ui-chart-controls/test/operators/pivotOperator.test.ts
index 326b7edd85..edde99189a 100644
---
a/superset-frontend/packages/superset-ui-chart-controls/test/operators/pivotOperator.test.ts
+++
b/superset-frontend/packages/superset-ui-chart-controls/test/operators/pivotOperator.test.ts
@@ -80,8 +80,6 @@ test('pivot by __timestamp without groupby', () => {
'sum(val)': { operator: 'mean' },
},
drop_missing_columns: false,
- flatten_columns: false,
- reset_index: false,
},
});
});
@@ -103,8 +101,6 @@ test('pivot by __timestamp with groupby', () => {
'sum(val)': { operator: 'mean' },
},
drop_missing_columns: false,
- flatten_columns: false,
- reset_index: false,
},
});
});
@@ -131,8 +127,6 @@ test('pivot by x_axis with groupby', () => {
'sum(val)': { operator: 'mean' },
},
drop_missing_columns: false,
- flatten_columns: false,
- reset_index: false,
},
});
});
@@ -163,8 +157,6 @@ test('pivot by adhoc x_axis', () => {
'sum(val)': { operator: 'mean' },
},
drop_missing_columns: false,
- flatten_columns: false,
- reset_index: false,
},
});
});
diff --git
a/superset-frontend/packages/superset-ui-chart-controls/test/operators/timeCompareOperator.test.ts
b/superset-frontend/packages/superset-ui-chart-controls/test/operators/timeCompareOperator.test.ts
index a2e6b31380..90ab4038de 100644
---
a/superset-frontend/packages/superset-ui-chart-controls/test/operators/timeCompareOperator.test.ts
+++
b/superset-frontend/packages/superset-ui-chart-controls/test/operators/timeCompareOperator.test.ts
@@ -51,8 +51,6 @@ const queryObject: QueryObject = {
},
},
drop_missing_columns: false,
- flatten_columns: false,
- reset_index: false,
},
},
{
diff --git
a/superset-frontend/packages/superset-ui-chart-controls/test/operators/timeComparePivotOperator.test.ts
b/superset-frontend/packages/superset-ui-chart-controls/test/operators/timeComparePivotOperator.test.ts
index 4ce88cb9c1..27ddef32db 100644
---
a/superset-frontend/packages/superset-ui-chart-controls/test/operators/timeComparePivotOperator.test.ts
+++
b/superset-frontend/packages/superset-ui-chart-controls/test/operators/timeComparePivotOperator.test.ts
@@ -93,8 +93,6 @@ test('should pivot on any type of timeCompare', () => {
},
},
drop_missing_columns: false,
- flatten_columns: false,
- reset_index: false,
columns: ['foo', 'bar'],
index: ['__timestamp'],
},
@@ -133,8 +131,6 @@ test('should pivot on x-axis', () => {
drop_missing_columns: false,
columns: ['foo', 'bar'],
index: ['ds'],
- flatten_columns: false,
- reset_index: false,
},
});
});
@@ -174,8 +170,6 @@ test('should pivot on adhoc x-axis', () => {
drop_missing_columns: false,
columns: ['foo', 'bar'],
index: ['my_case_expr'],
- flatten_columns: false,
- reset_index: false,
},
});
});
diff --git
a/superset-frontend/packages/superset-ui-core/src/query/types/PostProcessing.ts
b/superset-frontend/packages/superset-ui-core/src/query/types/PostProcessing.ts
index 315cdb8456..375affa7e5 100644
---
a/superset-frontend/packages/superset-ui-core/src/query/types/PostProcessing.ts
+++
b/superset-frontend/packages/superset-ui-core/src/query/types/PostProcessing.ts
@@ -111,12 +111,10 @@ interface _PostProcessingPivot {
columns: string[];
combine_value_with_metric?: boolean;
drop_missing_columns?: boolean;
- flatten_columns?: boolean;
index: string[];
marginal_distribution_name?: string;
marginal_distributions?: boolean;
metric_fill_value?: any;
- reset_index?: boolean;
};
}
export type PostProcessingPivot = _PostProcessingPivot | DefaultPostProcessing;
diff --git
a/superset-frontend/packages/superset-ui-core/test/query/types/PostProcessing.test.ts
b/superset-frontend/packages/superset-ui-core/test/query/types/PostProcessing.test.ts
index 1d7d9f044e..9cdec11ec8 100644
---
a/superset-frontend/packages/superset-ui-core/test/query/types/PostProcessing.test.ts
+++
b/superset-frontend/packages/superset-ui-core/test/query/types/PostProcessing.test.ts
@@ -110,8 +110,6 @@ const PIVOT_RULE: PostProcessingPivot = {
index: ['foo'],
columns: ['bar'],
aggregates: AGGREGATES_OPTION,
- flatten_columns: true,
- reset_index: true,
},
};
diff --git
a/superset-frontend/plugins/plugin-chart-echarts/test/MixedTimeseries/buildQuery.test.ts
b/superset-frontend/plugins/plugin-chart-echarts/test/MixedTimeseries/buildQuery.test.ts
index 0b766c2dc4..4b39639c2d 100644
---
a/superset-frontend/plugins/plugin-chart-echarts/test/MixedTimeseries/buildQuery.test.ts
+++
b/superset-frontend/plugins/plugin-chart-echarts/test/MixedTimeseries/buildQuery.test.ts
@@ -125,9 +125,7 @@ test('should compile query object A', () => {
},
columns: ['foo'],
drop_missing_columns: false,
- flatten_columns: false,
index: ['__timestamp'],
- reset_index: false,
},
},
{
@@ -188,9 +186,7 @@ test('should compile query object B', () => {
},
columns: [],
drop_missing_columns: false,
- flatten_columns: false,
index: ['__timestamp'],
- reset_index: false,
},
},
{
@@ -314,9 +310,7 @@ test('should compile query objects with x-axis', () => {
},
columns: ['foo'],
drop_missing_columns: false,
- flatten_columns: false,
index: ['my_index'],
- reset_index: false,
},
},
{
@@ -354,9 +348,7 @@ test('should compile query objects with x-axis', () => {
},
columns: [],
drop_missing_columns: false,
- flatten_columns: false,
index: ['my_index'],
- reset_index: false,
},
},
{
diff --git a/superset/utils/pandas_postprocessing/__init__.py
b/superset/utils/pandas_postprocessing/__init__.py
index 9755df984c..7902cb3232 100644
--- a/superset/utils/pandas_postprocessing/__init__.py
+++ b/superset/utils/pandas_postprocessing/__init__.py
@@ -33,7 +33,6 @@ from superset.utils.pandas_postprocessing.resample import
resample
from superset.utils.pandas_postprocessing.rolling import rolling
from superset.utils.pandas_postprocessing.select import select
from superset.utils.pandas_postprocessing.sort import sort
-from superset.utils.pandas_postprocessing.utils import
_flatten_column_after_pivot
__all__ = [
"aggregate",
@@ -53,5 +52,4 @@ __all__ = [
"select",
"sort",
"flatten",
- "_flatten_column_after_pivot",
]
diff --git a/superset/utils/pandas_postprocessing/pivot.py
b/superset/utils/pandas_postprocessing/pivot.py
index 89a187ce89..e18d58e28e 100644
--- a/superset/utils/pandas_postprocessing/pivot.py
+++ b/superset/utils/pandas_postprocessing/pivot.py
@@ -22,7 +22,6 @@ from pandas import DataFrame
from superset.constants import NULL_STRING, PandasAxis
from superset.exceptions import InvalidPostProcessingError
from superset.utils.pandas_postprocessing.utils import (
- _flatten_column_after_pivot,
_get_aggregate_funcs,
validate_column_args,
)
@@ -40,8 +39,6 @@ def pivot( # pylint:
disable=too-many-arguments,too-many-locals
combine_value_with_metric: bool = False,
marginal_distributions: Optional[bool] = None,
marginal_distribution_name: Optional[str] = None,
- flatten_columns: bool = True,
- reset_index: bool = True,
) -> DataFrame:
"""
Perform a pivot operation on a DataFrame.
@@ -61,8 +58,6 @@ def pivot( # pylint:
disable=too-many-arguments,too-many-locals
:param marginal_distributions: Add totals for row/column. Default to False
:param marginal_distribution_name: Name of row/column with marginal
distribution.
Default to 'All'.
- :param flatten_columns: Convert column names to strings
- :param reset_index: Convert index to column
:return: A pivot table
:raises InvalidPostProcessingError: If the request in incorrect
"""
@@ -114,12 +109,4 @@ def pivot( # pylint:
disable=too-many-arguments,too-many-locals
if combine_value_with_metric:
df = df.stack(0).unstack()
- # Make index regular column
- if flatten_columns:
- df.columns = [
- _flatten_column_after_pivot(col, aggregates) for col in df.columns
- ]
- # return index as regular column
- if reset_index:
- df.reset_index(level=0, inplace=True)
return df
diff --git a/superset/utils/pandas_postprocessing/utils.py
b/superset/utils/pandas_postprocessing/utils.py
index ab39135918..3d14f643c5 100644
--- a/superset/utils/pandas_postprocessing/utils.py
+++ b/superset/utils/pandas_postprocessing/utils.py
@@ -15,12 +15,12 @@
# specific language governing permissions and limitations
# under the License.
from functools import partial
-from typing import Any, Callable, Dict, Tuple, Union
+from typing import Any, Callable, Dict
import numpy as np
import pandas as pd
from flask_babel import gettext as _
-from pandas import DataFrame, NamedAgg, Timestamp
+from pandas import DataFrame, NamedAgg
from superset.exceptions import InvalidPostProcessingError
@@ -97,30 +97,6 @@ RESAMPLE_METHOD = ("asfreq", "bfill", "ffill", "linear",
"median", "mean", "sum"
FLAT_COLUMN_SEPARATOR = ", "
-def _flatten_column_after_pivot(
- column: Union[float, Timestamp, str, Tuple[str, ...]],
- aggregates: Dict[str, Dict[str, Any]],
-) -> str:
- """
- Function for flattening column names into a single string. This step is
necessary
- to be able to properly serialize a DataFrame. If the column is a string,
return
- element unchanged. For multi-element columns, join column elements with a
comma,
- with the exception of pivots made with a single aggregate, in which case
the
- aggregate column name is omitted.
-
- :param column: single element from `DataFrame.columns`
- :param aggregates: aggregates
- :return:
- """
- if not isinstance(column, tuple):
- column = (column,)
- if len(aggregates) == 1 and len(column) > 1:
- # drop aggregate for single aggregate pivots with multiple groupings
- # from column name (aggregates always come first in column name)
- column = column[1:]
- return FLAT_COLUMN_SEPARATOR.join([str(col) for col in column])
-
-
def _is_multi_index_on_columns(df: DataFrame) -> bool:
return isinstance(df.columns, pd.MultiIndex)
diff --git a/tests/unit_tests/pandas_postprocessing/test_compare.py
b/tests/unit_tests/pandas_postprocessing/test_compare.py
index 4f742bae16..9da8a31535 100644
--- a/tests/unit_tests/pandas_postprocessing/test_compare.py
+++ b/tests/unit_tests/pandas_postprocessing/test_compare.py
@@ -188,8 +188,6 @@ def test_compare_after_pivot():
"sum_metric": {"operator": "sum"},
"count_metric": {"operator": "sum"},
},
- flatten_columns=False,
- reset_index=False,
)
"""
count_metric sum_metric
diff --git a/tests/unit_tests/pandas_postprocessing/test_cum.py
b/tests/unit_tests/pandas_postprocessing/test_cum.py
index 17cd3c0efc..130e060252 100644
--- a/tests/unit_tests/pandas_postprocessing/test_cum.py
+++ b/tests/unit_tests/pandas_postprocessing/test_cum.py
@@ -83,8 +83,6 @@ def test_cum_after_pivot_with_single_metric():
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
- flatten_columns=False,
- reset_index=False,
)
"""
sum_metric
@@ -127,8 +125,6 @@ def test_cum_after_pivot_with_multiple_metrics():
"sum_metric": {"operator": "sum"},
"count_metric": {"operator": "sum"},
},
- flatten_columns=False,
- reset_index=False,
)
"""
count_metric sum_metric
diff --git a/tests/unit_tests/pandas_postprocessing/test_pivot.py
b/tests/unit_tests/pandas_postprocessing/test_pivot.py
index 658cb4edcd..8efd203906 100644
--- a/tests/unit_tests/pandas_postprocessing/test_pivot.py
+++ b/tests/unit_tests/pandas_postprocessing/test_pivot.py
@@ -17,86 +17,12 @@
import numpy as np
import pytest
-from pandas import DataFrame, Timestamp, to_datetime
+from pandas import DataFrame, to_datetime
from superset.exceptions import InvalidPostProcessingError
-from superset.utils.pandas_postprocessing import _flatten_column_after_pivot,
pivot
-from tests.unit_tests.fixtures.dataframes import categories_df,
single_metric_df
-from tests.unit_tests.pandas_postprocessing.utils import (
- AGGREGATES_MULTIPLE,
- AGGREGATES_SINGLE,
-)
-
-
-def test_flatten_column_after_pivot():
- """
- Test pivot column flattening function
- """
- # single aggregate cases
- assert (
- _flatten_column_after_pivot(
- aggregates=AGGREGATES_SINGLE,
- column="idx_nulls",
- )
- == "idx_nulls"
- )
-
- assert (
- _flatten_column_after_pivot(
- aggregates=AGGREGATES_SINGLE,
- column=1234,
- )
- == "1234"
- )
-
- assert (
- _flatten_column_after_pivot(
- aggregates=AGGREGATES_SINGLE,
- column=Timestamp("2020-09-29T00:00:00"),
- )
- == "2020-09-29 00:00:00"
- )
-
- assert (
- _flatten_column_after_pivot(
- aggregates=AGGREGATES_SINGLE,
- column="idx_nulls",
- )
- == "idx_nulls"
- )
-
- assert (
- _flatten_column_after_pivot(
- aggregates=AGGREGATES_SINGLE,
- column=("idx_nulls", "col1"),
- )
- == "col1"
- )
-
- assert (
- _flatten_column_after_pivot(
- aggregates=AGGREGATES_SINGLE,
- column=("idx_nulls", "col1", 1234),
- )
- == "col1, 1234"
- )
-
- # Multiple aggregate cases
- assert (
- _flatten_column_after_pivot(
- aggregates=AGGREGATES_MULTIPLE,
- column=("idx_nulls", "asc_idx", "col1"),
- )
- == "idx_nulls, asc_idx, col1"
- )
-
- assert (
- _flatten_column_after_pivot(
- aggregates=AGGREGATES_MULTIPLE,
- column=("idx_nulls", "asc_idx", "col1", 1234),
- )
- == "idx_nulls, asc_idx, col1, 1234"
- )
+from superset.utils.pandas_postprocessing import flatten, pivot
+from tests.unit_tests.fixtures.dataframes import categories_df
+from tests.unit_tests.pandas_postprocessing.utils import AGGREGATES_SINGLE
def test_pivot_without_columns():
@@ -108,9 +34,9 @@ def test_pivot_without_columns():
index=["name"],
aggregates=AGGREGATES_SINGLE,
)
- assert df.columns.tolist() == ["name", "idx_nulls"]
+ assert df.columns.tolist() == ["idx_nulls"]
assert len(df) == 101
- assert df.sum()[1] == 1050
+ assert df["idx_nulls"].sum() == 1050
def test_pivot_with_single_column():
@@ -123,9 +49,13 @@ def test_pivot_with_single_column():
columns=["category"],
aggregates=AGGREGATES_SINGLE,
)
- assert df.columns.tolist() == ["name", "cat0", "cat1", "cat2"]
+ assert df.columns.tolist() == [
+ ("idx_nulls", "cat0"),
+ ("idx_nulls", "cat1"),
+ ("idx_nulls", "cat2"),
+ ]
assert len(df) == 101
- assert df.sum()[1] == 315
+ assert df["idx_nulls"]["cat0"].sum() == 315
df = pivot(
df=categories_df,
@@ -133,7 +63,11 @@ def test_pivot_with_single_column():
columns=["category"],
aggregates=AGGREGATES_SINGLE,
)
- assert df.columns.tolist() == ["dept", "cat0", "cat1", "cat2"]
+ assert df.columns.tolist() == [
+ ("idx_nulls", "cat0"),
+ ("idx_nulls", "cat1"),
+ ("idx_nulls", "cat2"),
+ ]
assert len(df) == 5
@@ -147,6 +81,7 @@ def test_pivot_with_multiple_columns():
columns=["category", "dept"],
aggregates=AGGREGATES_SINGLE,
)
+ df = flatten(df)
assert len(df.columns) == 1 + 3 * 5 # index + possible permutations
@@ -161,7 +96,7 @@ def test_pivot_fill_values():
metric_fill_value=1,
aggregates={"idx_nulls": {"operator": "sum"}},
)
- assert df.sum()[1] == 382
+ assert df["idx_nulls"]["cat0"].sum() == 382
def test_pivot_fill_column_values():
@@ -177,7 +112,7 @@ def test_pivot_fill_column_values():
aggregates={"idx_nulls": {"operator": "sum"}},
)
assert len(df) == 101
- assert df.columns.tolist() == ["name", "<NULL>"]
+ assert df.columns.tolist() == [("idx_nulls", "<NULL>")]
def test_pivot_exceptions():
@@ -234,8 +169,9 @@ def test_pivot_eliminate_cartesian_product_columns():
aggregates={"metric": {"operator": "mean"}},
drop_missing_columns=False,
)
- assert list(df.columns) == ["dttm", "0, 0", "1, 1"]
- assert np.isnan(df["1, 1"][0])
+ df = flatten(df)
+ assert list(df.columns) == ["dttm", "metric, 0, 0", "metric, 1, 1"]
+ assert np.isnan(df["metric, 1, 1"][0])
# multiple metrics
mock_df = DataFrame(
@@ -258,6 +194,7 @@ def test_pivot_eliminate_cartesian_product_columns():
},
drop_missing_columns=False,
)
+ df = flatten(df)
assert list(df.columns) == [
"dttm",
"metric, 0, 0",
@@ -266,21 +203,3 @@ def test_pivot_eliminate_cartesian_product_columns():
"metric2, 1, 1",
]
assert np.isnan(df["metric, 1, 1"][0])
-
-
-def test_pivot_without_flatten_columns_and_reset_index():
- df = pivot(
- df=single_metric_df,
- index=["dttm"],
- columns=["country"],
- aggregates={"sum_metric": {"operator": "sum"}},
- flatten_columns=False,
- reset_index=False,
- )
- # metric
- # country UK US
- # dttm
- # 2019-01-01 5 6
- # 2019-01-02 7 8
- assert df.columns.to_list() == [("sum_metric", "UK"), ("sum_metric", "US")]
- assert df.index.to_list() == to_datetime(["2019-01-01",
"2019-01-02"]).to_list()
diff --git a/tests/unit_tests/pandas_postprocessing/test_resample.py
b/tests/unit_tests/pandas_postprocessing/test_resample.py
index 9f1aaef3e6..b1414c5fe8 100644
--- a/tests/unit_tests/pandas_postprocessing/test_resample.py
+++ b/tests/unit_tests/pandas_postprocessing/test_resample.py
@@ -110,8 +110,6 @@ def test_resample_after_pivot():
aggregates={
"val": {"operator": "sum"},
},
- flatten_columns=False,
- reset_index=False,
)
"""
val
diff --git a/tests/unit_tests/pandas_postprocessing/test_rolling.py
b/tests/unit_tests/pandas_postprocessing/test_rolling.py
index 4d4c4341b8..b72a8bee44 100644
--- a/tests/unit_tests/pandas_postprocessing/test_rolling.py
+++ b/tests/unit_tests/pandas_postprocessing/test_rolling.py
@@ -113,8 +113,6 @@ def test_rolling_should_empty_df():
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
- flatten_columns=False,
- reset_index=False,
)
rolling_df = pp.rolling(
df=pivot_df,
@@ -132,8 +130,6 @@ def test_rolling_after_pivot_with_single_metric():
index=["dttm"],
columns=["country"],
aggregates={"sum_metric": {"operator": "sum"}},
- flatten_columns=False,
- reset_index=False,
)
"""
sum_metric
@@ -182,8 +178,6 @@ def test_rolling_after_pivot_with_multiple_metrics():
"sum_metric": {"operator": "sum"},
"count_metric": {"operator": "sum"},
},
- flatten_columns=False,
- reset_index=False,
)
"""
count_metric sum_metric