This is an automated email from the ASF dual-hosted git repository. msyavuz pushed a commit to branch msyavuz/fix/space-column-names in repository https://gitbox.apache.org/repos/asf/superset.git
commit 8392c804f2cc8f59f5b02473669827ee830f30f1 Author: Mehmet Salih Yavuz <[email protected]> AuthorDate: Tue Oct 7 14:29:52 2025 +0300 fix(charts): resolve column names with spaces breaking x-axis rendering --- superset/connectors/sqla/models.py | 48 ++++++++++++++++-- superset/db_engine_specs/mssql.py | 1 + superset/db_engine_specs/sqlite.py | 1 + superset/models/helpers.py | 9 ++++ tests/unit_tests/core_tests.py | 67 ++++++++++++++++++++++++++ tests/unit_tests/db_engine_specs/test_mssql.py | 17 +++++++ 6 files changed, 140 insertions(+), 3 deletions(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 9749d07675..9e87176890 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -905,7 +905,21 @@ class TableColumn(AuditMixinNullable, ImportExportMixin, CertificationMixin, Mod msg=msg, ) ) from ex - col = literal_column(expression, type_=type_) + # Quote the expression if it contains spaces to prevent SQL parsing issues + # For example, "Test column" should not be interpreted as "Test AS column" + if ( + expression + and " " in expression + and not any( + keyword in expression.upper() + for keyword in ["SELECT", "CASE", "WHEN", "AS", "FROM", "WHERE"] + ) + ): + # This looks like a column name with spaces, not a SQL expression + # Use column() which properly quotes identifiers + col = column(expression, type_=type_) + else: + col = literal_column(expression, type_=type_) else: col = column(self.column_name, type_=type_) col = self.database.make_sqla_column_compatible(col, label) @@ -952,7 +966,21 @@ class TableColumn(AuditMixinNullable, ImportExportMixin, CertificationMixin, Mod msg=msg, ) ) from ex - col = literal_column(expression, type_=type_) + # Quote the expression if it contains spaces to prevent SQL parsing issues + # For example, "Test column" should not be interpreted as "Test AS column" + if ( + expression + and " " in expression + and not any( + keyword in expression.upper() + for keyword in ["SELECT", "CASE", "WHEN", "AS", "FROM", "WHERE"] + ) + ): + # This looks like a column name with spaces, not a SQL expression + # Use column() which properly quotes identifiers + col = column(expression, type_=type_) + else: + col = literal_column(expression, type_=type_) else: col = column(self.column_name, type_=type_) time_expr = self.db_engine_spec.get_timestamp_expr(col, pdf, time_grain) @@ -1522,7 +1550,21 @@ class SqlaTable( is_dttm = col_in_metadata.is_temporal pdf = col_in_metadata.python_date_format else: - sqla_column = literal_column(expression) + # Quote the expression if it contains spaces to prevent SQL parsing issues + # For example, "Test column" should not be interpreted as "Test AS column" + if ( + expression + and " " in expression + and not any( + keyword in expression.upper() + for keyword in ["SELECT", "CASE", "WHEN", "AS", "FROM", "WHERE"] + ) + ): + # This looks like a column name with spaces, not a SQL expression + # Use column() which properly quotes identifiers + sqla_column = column(expression) + else: + sqla_column = literal_column(expression) if has_timegrain or force_type_check: try: # probe adhoc column type diff --git a/superset/db_engine_specs/mssql.py b/superset/db_engine_specs/mssql.py index 6e238e9ddf..5177f2cafb 100644 --- a/superset/db_engine_specs/mssql.py +++ b/superset/db_engine_specs/mssql.py @@ -52,6 +52,7 @@ CONNECTION_HOST_DOWN_REGEX = re.compile( class MssqlEngineSpec(BaseEngineSpec): engine = "mssql" engine_name = "Microsoft SQL Server" + force_column_alias_quotes = True max_column_name_length = 128 allows_cte_in_subquery = False supports_multivalues_insert = True diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py index f8604bcfea..05d40e4a75 100644 --- a/superset/db_engine_specs/sqlite.py +++ b/superset/db_engine_specs/sqlite.py @@ -40,6 +40,7 @@ COLUMN_DOES_NOT_EXIST_REGEX = re.compile("no such column: (?P<column_name>.+)") class SqliteEngineSpec(BaseEngineSpec): engine = "sqlite" engine_name = "SQLite" + force_column_alias_quotes = True disable_ssh_tunneling = True supports_multivalues_insert = True diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 88230d7bc9..62b1ed6374 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -886,6 +886,15 @@ class ExploreMixin: # pylint: disable=too-many-public-methods be properly parsed and validated. """ if expression: + # Quote column names with spaces to prevent parsing issues + # For example, "Test column" should not be parsed as "Test AS column" + if " " in expression and not any( + keyword in expression.upper() + for keyword in ["SELECT", "CASE", "WHEN", "AS", "FROM", "WHERE"] + ): + # This looks like a column name with spaces, quote it + expression = f'"{expression}"' + expression = f"SELECT {expression}" if processed := self._process_sql_expression( diff --git a/tests/unit_tests/core_tests.py b/tests/unit_tests/core_tests.py index acd0501d82..fd4a2acdd7 100644 --- a/tests/unit_tests/core_tests.py +++ b/tests/unit_tests/core_tests.py @@ -145,6 +145,73 @@ def test_get_column_name_adhoc(): assert get_column_name(column) == "case when foo = 1 then 'foo' else 'bar' end" +def test_get_column_name_with_as_clause(): + # Test that we extract just the alias from expressions with AS clause + # This prevents double AS clauses when make_sqla_column_compatible adds .label() + + # MSSQL style with brackets + column = {"sqlExpression": "[Test Column] AS MyAlias"} + assert get_column_name(column) == "MyAlias" + + # Standard SQL with double quotes + column = {"sqlExpression": '"Test Column" AS MyAlias'} + assert get_column_name(column) == "MyAlias" + + # Simple column with AS + column = {"sqlExpression": "TestColumn AS MyAlias"} + assert get_column_name(column) == "MyAlias" + + # Column without AS clause should return the column name + column = {"sqlExpression": "[Test Column]"} + assert get_column_name(column) == "Test Column" + + # Simple column without AS + column = {"sqlExpression": "TestColumn"} + assert get_column_name(column) == "TestColumn" + + # Complex expression with AS + column = {"sqlExpression": "CASE WHEN foo = 1 THEN 'bar' END AS result"} + assert get_column_name(column) == "result" + + # If label is present, it takes precedence + column = {"label": "Custom Label", "sqlExpression": "TestColumn AS MyAlias"} + assert get_column_name(column) == "Custom Label" + + # Invalid SQL should fall back to returning the original expression + column = {"sqlExpression": "This is not valid SQL"} + assert get_column_name(column) == "This is not valid SQL" + + +def test_space_column_names_comprehensive(): + """Test comprehensive handling of column names with spaces across the system.""" + from sqlalchemy.sql.elements import quoted_name + + from superset.db_engine_specs.mssql import MssqlEngineSpec + from superset.db_engine_specs.sqlite import SqliteEngineSpec + + # Test engine specs have force_column_alias_quotes enabled + assert MssqlEngineSpec.force_column_alias_quotes is True + assert SqliteEngineSpec.force_column_alias_quotes is True + + # Test that labels with spaces get quoted + test_cases = [ + "Test Column", + "My Test Column", + "Column With Spaces", + ] + + for label in test_cases: + # MSSQL should use brackets + mssql_result = MssqlEngineSpec.make_label_compatible(label) + assert isinstance(mssql_result, quoted_name) + assert mssql_result.quote is True + + # SQLite should use double quotes + sqlite_result = SqliteEngineSpec.make_label_compatible(label) + assert isinstance(sqlite_result, quoted_name) + assert sqlite_result.quote is True + + def test_get_column_names(): assert get_column_names([STR_COLUMN, SQL_ADHOC_COLUMN]) == [ "my_column", diff --git a/tests/unit_tests/db_engine_specs/test_mssql.py b/tests/unit_tests/db_engine_specs/test_mssql.py index e0ce5e1180..e742763703 100644 --- a/tests/unit_tests/db_engine_specs/test_mssql.py +++ b/tests/unit_tests/db_engine_specs/test_mssql.py @@ -202,6 +202,23 @@ def test_column_datatype_to_string(original: TypeEngine, expected: str) -> None: assert actual == expected +def test_force_column_alias_quotes() -> None: + """Test that MSSQL properly quotes column aliases with spaces.""" + from sqlalchemy.sql.elements import quoted_name + + from superset.db_engine_specs.mssql import MssqlEngineSpec + + # Verify that force_column_alias_quotes is enabled + assert MssqlEngineSpec.force_column_alias_quotes is True + + # Test that labels with spaces get quoted + label = "Test Column" + result = MssqlEngineSpec.make_label_compatible(label) + assert isinstance(result, quoted_name) + assert result.quote is True + assert str(result) == "Test Column" + + @pytest.mark.parametrize( "original,expected", [
