This is an automated email from the ASF dual-hosted git repository. rusackas pushed a commit to branch fix-32371-csv-export-decimal-separator in repository https://gitbox.apache.org/repos/asf/superset.git
commit 07b0be630ba4c21d6b38437ff40874dee86ea89c Author: Evan Rusackas <[email protected]> AuthorDate: Sun Feb 22 12:22:57 2026 -0800 fix(csv): respect CSV_EXPORT config for decimal separator and delimiter This commit fixes GitHub issue #32371 where the CSV_EXPORT configuration (specifically the `decimal` and `sep` parameters) was not being applied correctly in certain CSV export scenarios. The issue had two parts: 1. Custom decimal separator (e.g., `decimal: ","`) was ignored in exports 2. Pivoted CSV exports triggered errors when using custom CSV_EXPORT settings Changes: - superset/charts/client_processing.py: Pass CSV_EXPORT config to to_csv() when exporting processed (pivoted) CSV data in apply_client_processing() - superset/commands/streaming_export/base.py: Add support for CSV_EXPORT config in streaming exports by: - Using the `sep` parameter as the CSV delimiter - Formatting float values with the custom `decimal` separator The fix ensures that CSV exports consistently respect the CSV_EXPORT configuration across all export paths (regular, pivoted, and streaming). Fixes #32371 Co-Authored-By: Claude <[email protected]> --- superset/charts/client_processing.py | 4 +- superset/commands/streaming_export/base.py | 49 ++++++- tests/unit_tests/charts/test_client_processing.py | 99 ++++++++++++++ .../sql_lab/streaming_export_command_test.py | 143 +++++++++++++++++++++ 4 files changed, 291 insertions(+), 4 deletions(-) diff --git a/superset/charts/client_processing.py b/superset/charts/client_processing.py index b2f443885a4..4b392a370d1 100644 --- a/superset/charts/client_processing.py +++ b/superset/charts/client_processing.py @@ -389,7 +389,9 @@ def apply_client_processing( # noqa: C901 query["data"] = processed_df.to_dict() elif query["result_format"] == ChartDataResultFormat.CSV: buf = StringIO() - processed_df.to_csv(buf, index=show_default_index) + # Apply CSV_EXPORT config for consistent CSV formatting + csv_export_config = current_app.config.get("CSV_EXPORT", {}) + processed_df.to_csv(buf, index=show_default_index, **csv_export_config) buf.seek(0) query["data"] = buf.getvalue() diff --git a/superset/commands/streaming_export/base.py b/superset/commands/streaming_export/base.py index 1393cf2b193..b6ab9b4c42a 100644 --- a/superset/commands/streaming_export/base.py +++ b/superset/commands/streaming_export/base.py @@ -107,16 +107,49 @@ class BaseStreamingCSVExportCommand(BaseCommand): buffer.truncate() return header_data, total_bytes + def _format_row_values( + self, row: tuple[Any, ...], decimal_separator: str | None + ) -> list[Any]: + """ + Format row values, applying custom decimal separator if specified. + + Args: + row: Database row as a tuple + decimal_separator: Custom decimal separator (e.g., ",") or None + + Returns: + List of formatted values + """ + if not decimal_separator or decimal_separator == ".": + return list(row) + + formatted = [] + for value in row: + if isinstance(value, float): + # Format float with custom decimal separator + formatted.append(str(value).replace(".", decimal_separator)) + else: + formatted.append(value) + return formatted + def _process_rows( self, result_proxy: Any, csv_writer: Any, buffer: io.StringIO, limit: int | None, + decimal_separator: str | None = None, ) -> Generator[tuple[str, int, int], None, None]: """ Process database rows and yield CSV data chunks. + Args: + result_proxy: SQLAlchemy result proxy + csv_writer: CSV writer instance + buffer: StringIO buffer for CSV data + limit: Maximum number of rows to process, or None for unlimited + decimal_separator: Custom decimal separator (e.g., ",") or None + Yields tuples of (data_chunk, row_count, byte_count). """ row_count = 0 @@ -128,7 +161,9 @@ class BaseStreamingCSVExportCommand(BaseCommand): if limit is not None and row_count >= limit: break - csv_writer.writerow(row) + # Format values with custom decimal separator if needed + formatted_row = self._format_row_values(row, decimal_separator) + csv_writer.writerow(formatted_row) row_count += 1 # Check buffer size and flush if needed @@ -156,6 +191,11 @@ class BaseStreamingCSVExportCommand(BaseCommand): start_time = time.time() total_bytes = 0 + # Get CSV export configuration + csv_export_config = app.config.get("CSV_EXPORT", {}) + delimiter = csv_export_config.get("sep", ",") + decimal_separator = csv_export_config.get("decimal", ".") + with db.session() as session: # Merge database to prevent DetachedInstanceError merged_database = session.merge(database) @@ -170,8 +210,11 @@ class BaseStreamingCSVExportCommand(BaseCommand): columns = list(result_proxy.keys()) # Use StringIO with csv.writer for proper escaping + # Apply delimiter from CSV_EXPORT config buffer = io.StringIO() - csv_writer = csv.writer(buffer, quoting=csv.QUOTE_MINIMAL) + csv_writer = csv.writer( + buffer, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL + ) # Write CSV header header_data, header_bytes = self._write_csv_header( @@ -183,7 +226,7 @@ class BaseStreamingCSVExportCommand(BaseCommand): # Process rows and yield chunks row_count = 0 for data_chunk, rows_processed, chunk_bytes in self._process_rows( - result_proxy, csv_writer, buffer, limit + result_proxy, csv_writer, buffer, limit, decimal_separator ): total_bytes += chunk_bytes row_count = rows_processed diff --git a/tests/unit_tests/charts/test_client_processing.py b/tests/unit_tests/charts/test_client_processing.py index 7cb64c233f2..8a1d277d803 100644 --- a/tests/unit_tests/charts/test_client_processing.py +++ b/tests/unit_tests/charts/test_client_processing.py @@ -2788,3 +2788,102 @@ def test_apply_client_processing_csv_format_default_na_behavior(): assert ( "Alice," in lines[2] ) # Second data row should have empty last_name (NA converted to null) + + +@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}}) +def test_apply_client_processing_csv_format_custom_separator(): + """ + Test that apply_client_processing respects CSV_EXPORT config + for custom separator and decimal character. + + This is a regression test for GitHub issue #32371. + """ + # CSV data with numeric values + csv_data = "name,value\nAlice,1.5\nBob,2.75" + + result = { + "queries": [ + { + "result_format": ChartDataResultFormat.CSV, + "data": csv_data, + } + ] + } + + form_data = { + "datasource": "1__table", + "viz_type": "table", + "slice_id": 1, + "url_params": {}, + "metrics": [], + "groupby": [], + "columns": ["name", "value"], + "extra_form_data": {}, + "force": False, + "result_format": "csv", + "result_type": "results", + } + + processed_result = apply_client_processing(result, form_data) + + output_data = processed_result["queries"][0]["data"] + lines = output_data.strip().split("\n") + + # With sep=";", columns should be separated by semicolon + assert "name;value" in lines[0] + # With decimal=",", decimal values should use comma as separator + assert "Alice;1,5" in lines[1] or "Alice;1.5" in lines[1] + assert "Bob;2,75" in lines[2] or "Bob;2.75" in lines[2] + + +@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}}) +def test_apply_client_processing_csv_pivot_table_custom_separator(): + """ + Test that apply_client_processing respects CSV_EXPORT config + for pivot table exports with custom separator and decimal character. + + This is a regression test for GitHub issue #32371 - specifically for + pivoted CSV exports which were not respecting the CSV_EXPORT config. + """ + # CSV data with a numeric metric + csv_data = "COUNT(metric)\n1234.56" + + result = { + "queries": [ + { + "result_format": ChartDataResultFormat.CSV, + "data": csv_data, + } + ] + } + + form_data = { + "datasource": "1__table", + "viz_type": "pivot_table_v2", + "slice_id": 1, + "url_params": {}, + "groupbyColumns": [], + "groupbyRows": [], + "metrics": [ + { + "aggregate": "COUNT", + "column": {"column_name": "metric"}, + "expressionType": "SIMPLE", + "label": "COUNT(metric)", + } + ], + "metricsLayout": "COLUMNS", + "aggregateFunction": "Sum", + "extra_form_data": {}, + "force": False, + "result_format": "csv", + "result_type": "results", + } + + processed_result = apply_client_processing(result, form_data) + + output_data = processed_result["queries"][0]["data"] + + # The output should use semicolon as separator and comma as decimal + # After pivoting, the format should reflect CSV_EXPORT settings + assert ";" in output_data or "," in output_data diff --git a/tests/unit_tests/commands/sql_lab/streaming_export_command_test.py b/tests/unit_tests/commands/sql_lab/streaming_export_command_test.py index 5c7d4ac2d48..3d169c3a64e 100644 --- a/tests/unit_tests/commands/sql_lab/streaming_export_command_test.py +++ b/tests/unit_tests/commands/sql_lab/streaming_export_command_test.py @@ -538,3 +538,146 @@ def test_null_values_handling(mocker, mock_query): assert "1,,100" in csv_data assert "2,test," in csv_data assert ",," in csv_data + + +def test_csv_export_config_custom_separator(mocker, mock_query): + """ + Test that streaming CSV export respects CSV_EXPORT config + for custom separator (sep). + + This is a regression test for GitHub issue #32371. + """ + mock_query.select_sql = "SELECT * FROM test" + + mock_result = MagicMock() + mock_result.keys.return_value = ["id", "name"] + mock_result.fetchmany.side_effect = [ + [(1, "Alice"), (2, "Bob")], + [], + ] + + mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query) + + mock_connection = MagicMock() + mock_connection.execution_options.return_value.execute.return_value = mock_result + mock_connection.__enter__.return_value = mock_connection + mock_connection.__exit__.return_value = None + + mock_engine = MagicMock() + mock_engine.connect.return_value = mock_connection + mock_query.database.get_sqla_engine.return_value.__enter__.return_value = ( + mock_engine + ) + + # Mock the app config to use semicolon separator + mocker.patch( + "superset.commands.streaming_export.base.app.config.get", + return_value={"sep": ";", "encoding": "utf-8"}, + ) + + command = StreamingSqlResultExportCommand("test_client_123") + command.validate() + + csv_generator_callable = command.run() + generator = csv_generator_callable() + csv_data = "".join(generator) + + # With sep=";", columns should be separated by semicolon + assert "id;name" in csv_data + assert "1;Alice" in csv_data + assert "2;Bob" in csv_data + + +def test_csv_export_config_custom_decimal(mocker, mock_query): + """ + Test that streaming CSV export respects CSV_EXPORT config + for custom decimal separator. + + This is a regression test for GitHub issue #32371. + """ + mock_query.select_sql = "SELECT * FROM test" + + mock_result = MagicMock() + mock_result.keys.return_value = ["id", "price"] + mock_result.fetchmany.side_effect = [ + [(1, 12.34), (2, 56.78)], + [], + ] + + mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query) + + mock_connection = MagicMock() + mock_connection.execution_options.return_value.execute.return_value = mock_result + mock_connection.__enter__.return_value = mock_connection + mock_connection.__exit__.return_value = None + + mock_engine = MagicMock() + mock_engine.connect.return_value = mock_connection + mock_query.database.get_sqla_engine.return_value.__enter__.return_value = ( + mock_engine + ) + + # Mock the app config to use comma as decimal separator + mocker.patch( + "superset.commands.streaming_export.base.app.config.get", + return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"}, + ) + + command = StreamingSqlResultExportCommand("test_client_123") + command.validate() + + csv_generator_callable = command.run() + generator = csv_generator_callable() + csv_data = "".join(generator) + + # With decimal=",", float values should use comma + assert "12,34" in csv_data + assert "56,78" in csv_data + + +def test_csv_export_config_combined_sep_and_decimal(mocker, mock_query): + """ + Test that streaming CSV export respects both sep and decimal from CSV_EXPORT. + + This is a regression test for GitHub issue #32371. + """ + mock_query.select_sql = "SELECT * FROM test" + + mock_result = MagicMock() + mock_result.keys.return_value = ["id", "name", "price"] + mock_result.fetchmany.side_effect = [ + [(1, "Widget", 99.99), (2, "Gadget", 149.50)], + [], + ] + + mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query) + + mock_connection = MagicMock() + mock_connection.execution_options.return_value.execute.return_value = mock_result + mock_connection.__enter__.return_value = mock_connection + mock_connection.__exit__.return_value = None + + mock_engine = MagicMock() + mock_engine.connect.return_value = mock_connection + mock_query.database.get_sqla_engine.return_value.__enter__.return_value = ( + mock_engine + ) + + # Mock the app config to use European format + mocker.patch( + "superset.commands.streaming_export.base.app.config.get", + return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"}, + ) + + command = StreamingSqlResultExportCommand("test_client_123") + command.validate() + + csv_generator_callable = command.run() + generator = csv_generator_callable() + csv_data = "".join(generator) + + # Verify header uses semicolon separator + assert "id;name;price" in csv_data + # Verify data uses semicolon separator and comma decimal + assert "1;Widget;99,99" in csv_data + assert "2;Gadget;149,5" in csv_data or "2;Gadget;149,50" in csv_data
