aminghadersohi commented on code in PR #38170:
URL: https://github.com/apache/superset/pull/38170#discussion_r3361687984
##########
superset/commands/streaming_export/base.py:
##########
@@ -161,6 +204,11 @@ def _execute_query_and_stream(
start_time = time.time()
total_bytes = 0
+ # Get CSV export configuration
+ csv_export_config = app.config.get("CSV_EXPORT", {})
+ delimiter = csv_export_config.get("sep", ",")
Review Comment:
This only extracts `sep` and `decimal` from `CSV_EXPORT`. The non-streaming
path in `client_processing.py` passes the full dict via `**csv_export_config`
to `to_csv()`, so other keys (`quotechar`, `lineterminator`, `encoding`, etc.)
are honoured there but silently dropped here. Worth either aligning the two
paths or documenting which keys the streaming path supports.
##########
superset/commands/streaming_export/base.py:
##########
@@ -161,6 +204,11 @@ def _execute_query_and_stream(
start_time = time.time()
total_bytes = 0
+ # Get CSV export configuration
+ csv_export_config = app.config.get("CSV_EXPORT", {})
Review Comment:
`CSV_EXPORT` has an explicit default in `config.py` (`{"encoding":
"utf-8-sig"}`), so prefer `app.config["CSV_EXPORT"]` over `.get("CSV_EXPORT",
{})`. Using `.get()` with a hardcoded fallback silently masks misconfiguration
that removes the key.
##########
superset/charts/client_processing.py:
##########
@@ -389,7 +389,9 @@ def apply_client_processing( # noqa: C901
query["data"] = processed_df.to_dict()
elif query["result_format"] == ChartDataResultFormat.CSV:
buf = StringIO()
- processed_df.to_csv(buf, index=show_default_index)
+ # Apply CSV_EXPORT config for consistent CSV formatting
+ csv_export_config = current_app.config.get("CSV_EXPORT", {})
Review Comment:
Same as the streaming path: `CSV_EXPORT` has a default in `config.py`, so
`current_app.config["CSV_EXPORT"]` is preferred over `.get("CSV_EXPORT", {})`.
##########
tests/unit_tests/charts/test_client_processing.py:
##########
@@ -2788,3 +2788,114 @@ def
test_apply_client_processing_csv_format_default_na_behavior():
assert (
"Alice," in lines[2]
) # Second data row should have empty last_name (NA converted to null)
+
+
+@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})
+def test_apply_client_processing_csv_format_custom_separator():
Review Comment:
Missing `-> None` return type annotation. Applies to all new test functions
added in this file.
##########
tests/unit_tests/commands/sql_lab/streaming_export_command_test.py:
##########
@@ -577,3 +578,196 @@ def test_catalog_and_schema_passed_to_engine(mocker,
mock_query, mock_result_pro
catalog="my_catalog",
schema="my_schema",
)
+
+
+def test_csv_export_config_custom_separator(mocker, mock_query):
Review Comment:
Missing `-> None` return type annotation. Applies to all new test functions
added in this file.
##########
tests/unit_tests/commands/sql_lab/streaming_export_command_test.py:
##########
@@ -577,3 +578,196 @@ def test_catalog_and_schema_passed_to_engine(mocker,
mock_query, mock_result_pro
catalog="my_catalog",
schema="my_schema",
)
+
+
+def test_csv_export_config_custom_separator(mocker, mock_query):
+ """
+ Test that streaming CSV export respects CSV_EXPORT config
+ for custom separator (sep).
+
+ This is a regression test for GitHub issue #32371.
+ """
+ mock_query.select_sql = "SELECT * FROM test"
+
+ mock_result = MagicMock()
+ mock_result.keys.return_value = ["id", "name"]
+ mock_result.fetchmany.side_effect = [
+ [(1, "Alice"), (2, "Bob")],
+ [],
+ ]
+
+ mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
+
+ mock_connection = MagicMock()
+ mock_connection.execution_options.return_value.execute.return_value =
mock_result
+ mock_connection.__enter__.return_value = mock_connection
+ mock_connection.__exit__.return_value = None
+
+ mock_engine = MagicMock()
+ mock_engine.connect.return_value = mock_connection
+ mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
+ mock_engine
+ )
+
+ # Mock the app config to use semicolon separator
+ mocker.patch(
+ "superset.commands.streaming_export.base.app.config.get",
+ return_value={"sep": ";", "encoding": "utf-8"},
+ )
+
+ command = StreamingSqlResultExportCommand("test_client_123")
+ command.validate()
+
+ csv_generator_callable = command.run()
+ generator = csv_generator_callable()
+ csv_data = "".join(generator)
+
+ # With sep=";", columns should be separated by semicolon
+ assert "id;name" in csv_data
+ assert "1;Alice" in csv_data
+ assert "2;Bob" in csv_data
+
+
+def test_csv_export_config_custom_decimal(mocker, mock_query):
+ """
+ Test that streaming CSV export respects CSV_EXPORT config
+ for custom decimal separator.
+
+ This is a regression test for GitHub issue #32371.
+ """
+ mock_query.select_sql = "SELECT * FROM test"
+
+ mock_result = MagicMock()
+ mock_result.keys.return_value = ["id", "price"]
+ mock_result.fetchmany.side_effect = [
+ [(1, 12.34), (2, 56.78)],
+ [],
+ ]
+
+ mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
+
+ mock_connection = MagicMock()
+ mock_connection.execution_options.return_value.execute.return_value =
mock_result
+ mock_connection.__enter__.return_value = mock_connection
+ mock_connection.__exit__.return_value = None
+
+ mock_engine = MagicMock()
+ mock_engine.connect.return_value = mock_connection
+ mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
+ mock_engine
+ )
+
+ # Mock the app config to use comma as decimal separator
+ mocker.patch(
+ "superset.commands.streaming_export.base.app.config.get",
+ return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
+ )
+
+ command = StreamingSqlResultExportCommand("test_client_123")
+ command.validate()
+
+ csv_generator_callable = command.run()
+ generator = csv_generator_callable()
+ csv_data = "".join(generator)
+
+ # With decimal=",", float values should use comma
+ assert "12,34" in csv_data
+ assert "56,78" in csv_data
+
+
+def test_csv_export_config_combined_sep_and_decimal(mocker, mock_query):
+ """
+ Test that streaming CSV export respects both sep and decimal from
CSV_EXPORT.
+
+ This is a regression test for GitHub issue #32371.
+ """
+ mock_query.select_sql = "SELECT * FROM test"
+
+ mock_result = MagicMock()
+ mock_result.keys.return_value = ["id", "name", "price"]
+ mock_result.fetchmany.side_effect = [
+ [(1, "Widget", 99.99), (2, "Gadget", 149.50)],
+ [],
+ ]
+
+ mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)
+
+ mock_connection = MagicMock()
+ mock_connection.execution_options.return_value.execute.return_value =
mock_result
+ mock_connection.__enter__.return_value = mock_connection
+ mock_connection.__exit__.return_value = None
+
+ mock_engine = MagicMock()
+ mock_engine.connect.return_value = mock_connection
+ mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
+ mock_engine
+ )
+
+ # Mock the app config to use European format
+ mocker.patch(
+ "superset.commands.streaming_export.base.app.config.get",
+ return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
+ )
+
+ command = StreamingSqlResultExportCommand("test_client_123")
+ command.validate()
+
+ csv_generator_callable = command.run()
+ generator = csv_generator_callable()
+ csv_data = "".join(generator)
+
+ # Verify header uses semicolon separator
+ assert "id;name;price" in csv_data
+ # Verify data uses semicolon separator and comma decimal
+ assert "1;Widget;99,99" in csv_data
+ assert "2;Gadget;149,5" in csv_data or "2;Gadget;149,50" in csv_data
Review Comment:
`assert A or B` is valid here (Python can render `149.50` as `149.5`), but
`assert ";149,5" in csv_data` covers both representations and is simpler.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]