villebro commented on code in PR #21163:
URL: https://github.com/apache/superset/pull/21163#discussion_r957482083
##########
tests/integration_tests/query_context_tests.py:
##########
@@ -728,3 +730,183 @@ def test_get_label_map(app_context,
virtual_dataset_comma_in_column_value):
"count, col2, row2": ["count", "col2, row2"],
"count, col2, row3": ["count", "col2, row3"],
}
+
+
+def test_time_column_with_time_grain(app_context, physical_dataset):
+ column_on_axis: AdhocColumn = {
+ "label": "I_AM_A_ORIGINAL_COLUMN",
Review Comment:
nit (don't bother changing this unless you happen to have the file open in
your IDE 😆):
```suggestion
"label": "I_AM_AN_ORIGINAL_COLUMN",
```
##########
superset/superset_typing.py:
##########
@@ -55,6 +55,8 @@ class AdhocColumn(TypedDict, total=False):
hasCustomLabel: Optional[bool]
label: Optional[str]
sqlExpression: Optional[str]
+ is_axis: Optional[bool]
+ time_grain: Optional[str]
Review Comment:
nit: these are snake_case, while the old ones are camelCase. It's a matter
of taste, of course, but I kinda prefer using camelCase as these kind of
originate from the TS codebase.
##########
tests/integration_tests/query_context_tests.py:
##########
@@ -728,3 +730,183 @@ def test_get_label_map(app_context,
virtual_dataset_comma_in_column_value):
"count, col2, row2": ["count", "col2, row2"],
"count, col2, row3": ["count", "col2, row3"],
}
+
+
+def test_time_column_with_time_grain(app_context, physical_dataset):
+ column_on_axis: AdhocColumn = {
+ "label": "I_AM_A_ORIGINAL_COLUMN",
+ "sqlExpression": "col5",
+ "time_grain": "P1Y",
+ }
+ adhoc_column: AdhocColumn = {
+ "label": "I_AM_A_TRUNC_COLUMN",
+ "sqlExpression": "col6",
+ "is_axis": True,
+ "time_grain": "P1Y",
+ }
+ qc = QueryContextFactory().create(
+ datasource={
+ "type": physical_dataset.type,
+ "id": physical_dataset.id,
+ },
+ queries=[
+ {
+ "columns": ["col1", column_on_axis, adhoc_column],
+ "metrics": ["count"],
+ "orderby": [["col1", True]],
+ }
+ ],
+ result_type=ChartDataResultType.FULL,
+ force=True,
+ )
+ query_object = qc.queries[0]
+ df = qc.get_df_payload(query_object)["df"]
+ if query_object.datasource.database.backend == "sqlite":
+ # sqlite returns string as timestamp column
+ assert df["I_AM_A_ORIGINAL_COLUMN"][0] == "2000-01-01 00:00:00"
+ assert df["I_AM_A_ORIGINAL_COLUMN"][1] == "2000-01-02 00:00:00"
+ assert df["I_AM_A_TRUNC_COLUMN"][0] == "2002-01-01 00:00:00"
+ assert df["I_AM_A_TRUNC_COLUMN"][1] == "2002-01-01 00:00:00"
+ else:
+ assert df["I_AM_A_ORIGINAL_COLUMN"][0].strftime("%Y-%m-%d") ==
"2000-01-01"
+ assert df["I_AM_A_ORIGINAL_COLUMN"][1].strftime("%Y-%m-%d") ==
"2000-01-02"
+ assert df["I_AM_A_TRUNC_COLUMN"][0].strftime("%Y-%m-%d") ==
"2002-01-01"
+ assert df["I_AM_A_TRUNC_COLUMN"][1].strftime("%Y-%m-%d") ==
"2002-01-01"
+
+
+def test_non_time_column_with_time_grain(app_context, physical_dataset):
+ qc = QueryContextFactory().create(
+ datasource={
+ "type": physical_dataset.type,
+ "id": physical_dataset.id,
+ },
+ queries=[
+ {
+ "columns": [
+ "col1",
+ {
+ "label": "COL2 ALIAS",
+ "sqlExpression": "col2",
+ "is_axis": True,
+ "time_grain": "P1Y",
+ },
+ ],
+ "metrics": ["count"],
+ "orderby": [["col1", True]],
+ "row_limit": 1,
+ }
+ ],
+ result_type=ChartDataResultType.FULL,
+ force=True,
+ )
+
+ query_object = qc.queries[0]
+ df = qc.get_df_payload(query_object)["df"]
+ assert df["COL2 ALIAS"][0] == "a"
+
+
+def test_special_chars_in_column_name(app_context, physical_dataset):
+ qc = QueryContextFactory().create(
+ datasource={
+ "type": physical_dataset.type,
+ "id": physical_dataset.id,
+ },
+ queries=[
+ {
+ "columns": [
+ "col1",
+ "time column with spaces",
+ {
+ "label": "I_AM_A_TRUNC_COLUMN",
+ "sqlExpression": "time column with spaces",
+ "is_axis": True,
+ "time_grain": "P1Y",
+ },
+ ],
+ "metrics": ["count"],
+ "orderby": [["col1", True]],
+ "row_limit": 1,
+ }
+ ],
+ result_type=ChartDataResultType.FULL,
+ force=True,
+ )
+
+ query_object = qc.queries[0]
+ df = qc.get_df_payload(query_object)["df"]
+ if query_object.datasource.database.backend == "sqlite":
+ # sqlite returns string as timestamp column
+ assert df["time column with spaces"][0] == "2002-01-03 00:00:00"
+ assert df["I_AM_A_TRUNC_COLUMN"][0] == "2002-01-01 00:00:00"
+ else:
+ assert df["time column with spaces"][0].strftime("%Y-%m-%d") ==
"2002-01-03"
+ assert df["I_AM_A_TRUNC_COLUMN"][0].strftime("%Y-%m-%d") ==
"2002-01-01"
+
+
+@only_postgresql
Review Comment:
I love this!
##########
superset/connectors/sqla/utils.py:
##########
@@ -147,6 +149,24 @@ def get_virtual_table_metadata(dataset: "SqlaTable") ->
List[ResultSetColumnType
return cols
+def get_columns_description(
+ database: Database,
+ query: str,
+) -> List[ResultSetColumnType]:
+ db_engine_spec = database.db_engine_spec
+ try:
+ with closing(database.get_sqla_engine().raw_connection()) as conn:
+ cursor = conn.cursor()
+ query = database.apply_limit_to_sql(query, limit=1)
+ cursor.execute(query)
+ db_engine_spec.execute(cursor, query)
+ result = db_engine_spec.fetch_data(cursor, limit=1)
+ result_set = SupersetResultSet(result, cursor.description,
db_engine_spec)
+ return result_set.columns
+ except Exception as ex:
+ raise SupersetGenericDBErrorException(message=str(ex)) from ex
Review Comment:
I've been thinking about this for quite some time, and one way of doing this
would be adding a new endpoint to the dataset API, like `[GET]
/api/v1/dataset/{pk}/coltype`, that you could send any arbitrary expression to,
and it would return the column metadata for it (the `is_dttm` etc that we now
get back in the description). Then we could add a button to the adhoc control
that makes it possible to fetch the metadata, and if it's temporal, it would
expose the time grain dropdown in the popover.
This would make it possible to have different time grains for different
columns, and reuse the same logic here (when we still don't have the frontend
designs, we could just call the relevant logic from the backend without an API
call).
##########
superset/connectors/sqla/utils.py:
##########
@@ -184,12 +204,12 @@ def validate_adhoc_subquery(
@memoized
def get_dialect_name(drivername: str) -> str:
- return SqlaURL(drivername).get_dialect().name
+ return SqlaURL.create(drivername).get_dialect().name
Review Comment:
nice catch
##########
superset/connectors/sqla/models.py:
##########
@@ -1124,7 +1125,25 @@ def adhoc_column_to_sqla(
schema=self.schema,
template_processor=template_processor,
)
- sqla_column = literal_column(expression)
+ col_in_metadata = self.get_column(expression)
+ if col_in_metadata:
+ sqla_column = col_in_metadata.get_sqla_col()
+ is_dttm = col_in_metadata.is_temporal
+ else:
+ sqla_column = literal_column(expression)
+ # probe adhoc column type
+ tbl, _ = self.get_from_clause(template_processor)
+ qry = sa.select([sqla_column]).limit(1).select_from(tbl)
+ sql = self.database.compile_sqla_query(qry)
+ col_desc = get_columns_description(self.database, sql)
+ is_dttm = col_desc[0]["is_dttm"]
+
+ if col.get("is_axis") and col.get("time_grain") and is_dttm:
+ sqla_column = self.db_engine_spec.get_timestamp_expr(
+ sqla_column,
+ None,
+ col.get("time_grain"),
+ )
Review Comment:
I haven't thought quite this far yet, but is there a reason we want to have
`is_axis` here in addition to having `time_grain`? I would assume the frontend
simply omits `time_grain` if it doesn't want to apply a time grain. Also, mini
nit as we're now on 3.8+..
```suggestion
if col.get("is_axis") and time_grain := col.get("time_grain") and
is_dttm:
sqla_column = self.db_engine_spec.get_timestamp_expr(
sqla_column,
None,
time_grain,
)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]