Copilot commented on code in PR #38698:
URL: https://github.com/apache/superset/pull/38698#discussion_r2955019182
##########
superset/config.py:
##########
@@ -2057,6 +2057,9 @@ def EMAIL_HEADER_MUTATOR( # pylint:
disable=invalid-name,unused-argument # noq
SQL_VALIDATORS_BY_ENGINE = {
"presto": "PrestoDBSQLValidator",
"postgresql": "PostgreSQLValidator",
+ # "sqlite": "SQLiteSQLValidator",
+ # "gsheets": "SQLiteSQLValidator",
Review Comment:
These engine→validator mappings are commented out, so the new SQLite
validator won’t actually be used in SQL Lab by default (and the validate_sql
endpoint will still report “no SQL validator is configured” for
sqlite/gsheets/shillelagh). If the intent is to ship this feature enabled,
these should be active entries; if the intent is opt-in, it would help to
clarify that in the PR description and/or add a brief comment explaining how to
enable it safely (eg requiring the `apache-superset[sqlite]` extra).
##########
tests/unit_tests/sql_validators/sqlite_test.py:
##########
@@ -0,0 +1,270 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from subprocess import CompletedProcess
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from superset.sql_validators.sqlite import SQLiteSQLValidator
+
+
+def _mock_result(
+ returncode: int,
+ stderr: str = "",
+ stdout: str = "",
+) -> CompletedProcess[str]:
+ return CompletedProcess(
+ args=[],
+ returncode=returncode,
+ stdout=stdout,
+ stderr=stderr,
+ )
+
+
+def test_valid_syntax() -> None:
+ mock_database = MagicMock()
+ sql = "SELECT 1, col FROM my_table"
+
+ with (
+ patch(
+ "superset.sql_validators.sqlite.get_binary_path",
+ return_value="syntaqlite",
+ ),
+ patch(
+ "superset.sql_validators.sqlite.subprocess.run",
+ return_value=_mock_result(returncode=0),
+ ) as run,
+ ):
+ annotations = SQLiteSQLValidator.validate(
+ sql=sql,
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
+
+ assert annotations == []
+ run.assert_called_once()
+ command = run.call_args.args[0]
+ assert "--input" not in command
+ assert "-e" not in command
+ assert run.call_args.kwargs["input"] == sql
+
+
+def test_invalid_syntax_single_error() -> None:
+ mock_database = MagicMock()
+ stderr = (
+ 'error: near "SELEC": syntax error\n'
+ " --> <input>:1:1\n"
+ " |\n"
+ "1 | SELEC * FROM foo\n"
+ " | ^~~~~\n"
+ )
+
+ with (
+ patch(
+ "superset.sql_validators.sqlite.get_binary_path",
return_value="syntaqlite"
+ ),
+ patch(
+ "superset.sql_validators.sqlite.subprocess.run",
+ return_value=_mock_result(returncode=1, stderr=stderr),
+ ),
+ ):
+ annotations = SQLiteSQLValidator.validate(
+ sql="SELEC * FROM foo",
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
+
+ assert len(annotations) == 1
+ annotation = annotations[0]
+ assert annotation.line_number == 1
+ assert annotation.start_column == 1
+ assert "SELEC" in annotation.message
+
Review Comment:
The tests validate `line_number` and `start_column`, but don’t assert
`end_column`. Since the validator is intended to provide precise spans, adding
an `end_column` assertion (especially for the caret+tilde width calculation)
would prevent regressions and would have caught the current off-by-one issue.
##########
tests/unit_tests/sql_validators/sqlite_test.py:
##########
@@ -0,0 +1,270 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from subprocess import CompletedProcess
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from superset.sql_validators.sqlite import SQLiteSQLValidator
+
+
+def _mock_result(
+ returncode: int,
+ stderr: str = "",
+ stdout: str = "",
+) -> CompletedProcess[str]:
+ return CompletedProcess(
+ args=[],
+ returncode=returncode,
+ stdout=stdout,
+ stderr=stderr,
+ )
+
+
+def test_valid_syntax() -> None:
+ mock_database = MagicMock()
+ sql = "SELECT 1, col FROM my_table"
+
+ with (
+ patch(
+ "superset.sql_validators.sqlite.get_binary_path",
+ return_value="syntaqlite",
+ ),
+ patch(
+ "superset.sql_validators.sqlite.subprocess.run",
+ return_value=_mock_result(returncode=0),
+ ) as run,
+ ):
+ annotations = SQLiteSQLValidator.validate(
+ sql=sql,
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
+
+ assert annotations == []
+ run.assert_called_once()
+ command = run.call_args.args[0]
+ assert "--input" not in command
+ assert "-e" not in command
+ assert run.call_args.kwargs["input"] == sql
+
+
+def test_invalid_syntax_single_error() -> None:
+ mock_database = MagicMock()
+ stderr = (
+ 'error: near "SELEC": syntax error\n'
+ " --> <input>:1:1\n"
+ " |\n"
+ "1 | SELEC * FROM foo\n"
+ " | ^~~~~\n"
+ )
+
+ with (
+ patch(
+ "superset.sql_validators.sqlite.get_binary_path",
return_value="syntaqlite"
+ ),
+ patch(
+ "superset.sql_validators.sqlite.subprocess.run",
+ return_value=_mock_result(returncode=1, stderr=stderr),
+ ),
+ ):
+ annotations = SQLiteSQLValidator.validate(
+ sql="SELEC * FROM foo",
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
+
+ assert len(annotations) == 1
+ annotation = annotations[0]
+ assert annotation.line_number == 1
+ assert annotation.start_column == 1
+ assert "SELEC" in annotation.message
+
+
+def test_invalid_syntax_multiple_errors() -> None:
+ mock_database = MagicMock()
+ stderr = (
+ 'error: near "SELEC": syntax error\n'
+ " --> <input>:1:1\n"
+ " |\n"
+ "1 | SELEC * FROM foo; SELEC * FROM bar\n"
+ " | ^~~~~\n\n"
+ 'error: near "SELEC": syntax error\n'
+ " --> <input>:1:20\n"
+ " |\n"
+ "1 | SELEC * FROM foo; SELEC * FROM bar\n"
+ " | ^~~~~\n"
+ )
+
+ with (
+ patch(
+ "superset.sql_validators.sqlite.get_binary_path",
return_value="syntaqlite"
+ ),
+ patch(
+ "superset.sql_validators.sqlite.subprocess.run",
+ return_value=_mock_result(returncode=1, stderr=stderr),
+ ),
+ ):
+ annotations = SQLiteSQLValidator.validate(
+ sql="SELEC * FROM foo; SELEC * FROM bar",
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
+
+ assert len(annotations) == 2
+ assert "SELEC" in annotations[0].message
+
+
+def test_multiline_error_reports_correct_line() -> None:
+ mock_database = MagicMock()
+ stderr = (
+ 'error: near "SELEC": syntax error\n'
+ " --> <input>:2:1\n"
+ " |\n"
+ "2 | SELEC * FROM foo\n"
+ " | ^~~~~\n"
+ )
+
+ with (
+ patch(
+ "superset.sql_validators.sqlite.get_binary_path",
return_value="syntaqlite"
+ ),
+ patch(
+ "superset.sql_validators.sqlite.subprocess.run",
+ return_value=_mock_result(returncode=1, stderr=stderr),
+ ),
+ ):
+ annotations = SQLiteSQLValidator.validate(
+ sql="SELECT 1;\nSELEC * FROM foo",
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
+
+ assert len(annotations) == 1
+ assert annotations[0].line_number == 2
+
+
+def test_empty_sql() -> None:
+ mock_database = MagicMock()
+
+ with (
+ patch(
+ "superset.sql_validators.sqlite.get_binary_path",
return_value="syntaqlite"
+ ),
+ patch(
+ "superset.sql_validators.sqlite.subprocess.run",
+ return_value=_mock_result(returncode=0),
+ ),
+ ):
+ annotations = SQLiteSQLValidator.validate(
+ sql="",
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
+
+ assert annotations == []
+
+
+def test_valid_complex_query() -> None:
+ mock_database = MagicMock()
+
+ with (
+ patch(
+ "superset.sql_validators.sqlite.get_binary_path",
return_value="syntaqlite"
+ ),
+ patch(
+ "superset.sql_validators.sqlite.subprocess.run",
+ return_value=_mock_result(returncode=0),
+ ),
+ ):
+ annotations = SQLiteSQLValidator.validate(
+ sql=(
+ "SELECT a, COUNT(*) AS cnt "
+ "FROM my_table "
+ "WHERE b > 10 "
+ "GROUP BY a "
+ "HAVING cnt > 1 "
+ "ORDER BY cnt DESC "
+ "LIMIT 100"
+ ),
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
+
+ assert annotations == []
+
+
+def test_annotation_to_dict() -> None:
+ mock_database = MagicMock()
+ stderr = (
+ 'error: near "SELEC": syntax error\n'
+ " --> <input>:1:1\n"
+ " |\n"
+ "1 | SELEC * FROM foo\n"
+ " | ^~~~~\n"
+ )
+
+ with (
+ patch(
+ "superset.sql_validators.sqlite.get_binary_path",
return_value="syntaqlite"
+ ),
+ patch(
+ "superset.sql_validators.sqlite.subprocess.run",
+ return_value=_mock_result(returncode=1, stderr=stderr),
+ ),
+ ):
+ annotations = SQLiteSQLValidator.validate(
+ sql="SELEC * FROM foo",
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
+
+ assert len(annotations) == 1
+ result = annotations[0].to_dict()
+ assert "line_number" in result
+ assert "start_column" in result
+ assert "end_column" in result
+ assert "message" in result
+
+
+def test_missing_syntaqlite_raises_import_error() -> None:
+ mock_database = MagicMock()
+ with patch("superset.sql_validators.sqlite.get_binary_path", None):
+ with pytest.raises(ImportError, match="syntaqlite is not installed"):
+ SQLiteSQLValidator.validate(
+ sql="SELECT 1",
+ catalog=None,
+ schema="",
+ database=mock_database,
+ )
Review Comment:
This test codifies the current behavior of raising `ImportError` when
`syntaqlite` is missing. If the validator is updated to return an annotation
instead (so the validate_sql endpoint can still return a `result` list), this
test should be updated accordingly to assert on the returned annotation message
rather than an exception.
##########
superset/sql_validators/sqlite.py:
##########
@@ -0,0 +1,129 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import logging
+import re
+import subprocess
+
+from superset.models.core import Database
+from superset.sql_validators.base import BaseSQLValidator,
SQLValidationAnnotation
+
+try:
+ from syntaqlite import get_binary_path
+except ModuleNotFoundError:
+ get_binary_path = None
+
+logger = logging.getLogger(__name__)
+
+DIAGNOSTIC_RE = re.compile(
+ r"^(?:error|warning): (.+)\n"
+ r" --> .+?:(\d+):(\d+)\n"
+ r" +\|\n"
+ r"\d+ \| .+\n"
+ r" +\| +\^(~*)",
+ re.MULTILINE,
+)
+
+
+class SQLiteSQLValidator(BaseSQLValidator): # pylint:
disable=too-few-public-methods
+ """Validate SQL queries using the syntaqlite binary"""
+
+ name = "SQLiteSQLValidator"
+
+ @classmethod
+ def validate(
+ cls,
+ sql: str,
+ catalog: str | None,
+ schema: str | None,
+ database: Database,
+ ) -> list[SQLValidationAnnotation]:
+ annotations: list[SQLValidationAnnotation] = []
+
+ if get_binary_path is None:
+ raise ImportError(
+ "syntaqlite is not installed. Install it with: "
+ 'pip install "apache-superset[sqlite]"'
+ )
Review Comment:
Raising `ImportError` when `syntaqlite` isn’t installed will cause the
validate_sql endpoint to return an error response (not an annotations list) if
this validator is configured, which prevents SQL Lab from showing a targeted
validation message. Consider returning a single `SQLValidationAnnotation` with
a clear install instruction (similar to the FileNotFoundError case) so the API
contract remains `list[SQLValidationAnnotation]` even when the dependency is
missing.
##########
superset/sql_validators/sqlite.py:
##########
@@ -0,0 +1,129 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import logging
+import re
+import subprocess
+
+from superset.models.core import Database
+from superset.sql_validators.base import BaseSQLValidator,
SQLValidationAnnotation
+
+try:
+ from syntaqlite import get_binary_path
+except ModuleNotFoundError:
+ get_binary_path = None
+
+logger = logging.getLogger(__name__)
+
+DIAGNOSTIC_RE = re.compile(
+ r"^(?:error|warning): (.+)\n"
+ r" --> .+?:(\d+):(\d+)\n"
+ r" +\|\n"
+ r"\d+ \| .+\n"
+ r" +\| +\^(~*)",
+ re.MULTILINE,
+)
+
+
+class SQLiteSQLValidator(BaseSQLValidator): # pylint:
disable=too-few-public-methods
+ """Validate SQL queries using the syntaqlite binary"""
+
+ name = "SQLiteSQLValidator"
+
+ @classmethod
+ def validate(
+ cls,
+ sql: str,
+ catalog: str | None,
+ schema: str | None,
+ database: Database,
+ ) -> list[SQLValidationAnnotation]:
+ annotations: list[SQLValidationAnnotation] = []
+
+ if get_binary_path is None:
+ raise ImportError(
+ "syntaqlite is not installed. Install it with: "
+ 'pip install "apache-superset[sqlite]"'
+ )
+
+ try:
+ result = subprocess.run( # noqa: S603
+ [
+ get_binary_path(),
+ "--no-config",
+ "validate",
+ "--allow",
+ "schema",
+ ],
+ input=sql,
+ capture_output=True,
+ text=True,
+ timeout=10,
+ )
+ except FileNotFoundError:
+ logger.exception("syntaqlite binary not found")
+ annotations.append(
+ SQLValidationAnnotation(
+ message=(
+ "syntaqlite binary not found. Ensure it is correctly
installed "
+ 'via "pip install \\"apache-superset[sqlite]\\"" and
available '
+ "on the system PATH."
+ ),
+ line_number=None,
+ start_column=None,
+ end_column=None,
+ )
+ )
+ return annotations
+ except subprocess.TimeoutExpired:
+ logger.warning("syntaqlite timed out validating SQL")
+ return annotations
+
+ if result.returncode == 0:
+ return annotations
+
+ output = (result.stderr or result.stdout).replace("\r\n", "\n")
+ for match in DIAGNOSTIC_RE.finditer(output):
+ message = match.group(1)
+ line_number = int(match.group(2))
+ start_column = int(match.group(3))
+ # The caret (^) plus tildes (~) span the error token
+ end_column = start_column + 1 + len(match.group(4))
Review Comment:
`end_column` is computed as `start_column + 1 + len(~)` which makes it an
exclusive end position. Elsewhere in the codebase (eg expression validation)
`end_column` is treated as an inclusive column index, so this will
over-highlight by 1 character. Consider using `start_column + len(~)` for an
inclusive end (caret plus tildes).
##########
superset/sql_validators/sqlite.py:
##########
@@ -0,0 +1,129 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import logging
+import re
+import subprocess
+
+from superset.models.core import Database
+from superset.sql_validators.base import BaseSQLValidator,
SQLValidationAnnotation
+
+try:
+ from syntaqlite import get_binary_path
+except ModuleNotFoundError:
+ get_binary_path = None
+
+logger = logging.getLogger(__name__)
+
+DIAGNOSTIC_RE = re.compile(
+ r"^(?:error|warning): (.+)\n"
+ r" --> .+?:(\d+):(\d+)\n"
+ r" +\|\n"
+ r"\d+ \| .+\n"
+ r" +\| +\^(~*)",
+ re.MULTILINE,
+)
+
+
+class SQLiteSQLValidator(BaseSQLValidator): # pylint:
disable=too-few-public-methods
+ """Validate SQL queries using the syntaqlite binary"""
+
+ name = "SQLiteSQLValidator"
+
+ @classmethod
+ def validate(
+ cls,
+ sql: str,
+ catalog: str | None,
+ schema: str | None,
+ database: Database,
+ ) -> list[SQLValidationAnnotation]:
+ annotations: list[SQLValidationAnnotation] = []
+
+ if get_binary_path is None:
+ raise ImportError(
+ "syntaqlite is not installed. Install it with: "
+ 'pip install "apache-superset[sqlite]"'
+ )
+
+ try:
+ result = subprocess.run( # noqa: S603
+ [
+ get_binary_path(),
+ "--no-config",
+ "validate",
+ "--allow",
+ "schema",
+ ],
+ input=sql,
+ capture_output=True,
+ text=True,
+ timeout=10,
+ )
+ except FileNotFoundError:
+ logger.exception("syntaqlite binary not found")
+ annotations.append(
+ SQLValidationAnnotation(
+ message=(
+ "syntaqlite binary not found. Ensure it is correctly
installed "
+ 'via "pip install \\"apache-superset[sqlite]\\"" and
available '
+ "on the system PATH."
+ ),
+ line_number=None,
+ start_column=None,
+ end_column=None,
+ )
+ )
+ return annotations
+ except subprocess.TimeoutExpired:
+ logger.warning("syntaqlite timed out validating SQL")
Review Comment:
On `subprocess.TimeoutExpired` the validator logs a warning and returns an
empty annotations list, which will be interpreted as “valid SQL” even though
validation never completed. Returning a non-location annotation (or raising a
dedicated validation error) would avoid false negatives and make the failure
visible to users.
##########
superset/sql_validators/sqlite.py:
##########
@@ -0,0 +1,129 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import logging
+import re
+import subprocess
+
+from superset.models.core import Database
+from superset.sql_validators.base import BaseSQLValidator,
SQLValidationAnnotation
+
+try:
+ from syntaqlite import get_binary_path
+except ModuleNotFoundError:
+ get_binary_path = None
+
+logger = logging.getLogger(__name__)
+
+DIAGNOSTIC_RE = re.compile(
+ r"^(?:error|warning): (.+)\n"
+ r" --> .+?:(\d+):(\d+)\n"
+ r" +\|\n"
+ r"\d+ \| .+\n"
+ r" +\| +\^(~*)",
+ re.MULTILINE,
+)
+
+
+class SQLiteSQLValidator(BaseSQLValidator): # pylint:
disable=too-few-public-methods
+ """Validate SQL queries using the syntaqlite binary"""
+
+ name = "SQLiteSQLValidator"
+
+ @classmethod
+ def validate(
+ cls,
+ sql: str,
+ catalog: str | None,
+ schema: str | None,
+ database: Database,
+ ) -> list[SQLValidationAnnotation]:
+ annotations: list[SQLValidationAnnotation] = []
+
+ if get_binary_path is None:
+ raise ImportError(
+ "syntaqlite is not installed. Install it with: "
+ 'pip install "apache-superset[sqlite]"'
+ )
+
+ try:
+ result = subprocess.run( # noqa: S603
+ [
+ get_binary_path(),
+ "--no-config",
+ "validate",
+ "--allow",
+ "schema",
+ ],
+ input=sql,
+ capture_output=True,
+ text=True,
+ timeout=10,
+ )
+ except FileNotFoundError:
+ logger.exception("syntaqlite binary not found")
+ annotations.append(
Review Comment:
`logger.exception` will emit a full traceback for the common/expected case
where the syntaqlite binary isn’t present. Using
`logger.warning`/`logger.error` without a traceback (and optionally including
the resolved binary path) would reduce log noise while still surfacing the
issue.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]