This is an automated email from the ASF dual-hosted git repository.
jli pushed a commit to branch 4.1
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/4.1 by this push:
new 41bcc9d0a6 fix: cache-warmup fails (#31173)
41bcc9d0a6 is described below
commit 41bcc9d0a6150ddb5081b208d378d0736c5e2b88
Author: nsivarajan <[email protected]>
AuthorDate: Sat Dec 7 07:11:38 2024 +0530
fix: cache-warmup fails (#31173)
Co-authored-by: Sivarajan Narayanan <[email protected]>
Co-authored-by: Pat Heard <[email protected]>
(cherry picked from commit 592564b623a18c5eb553300b63f407e249b744e3)
---
UPDATING.md | 8 ++++-
superset/tasks/cache.py | 9 ++++--
superset/tasks/utils.py | 2 +-
superset/utils/urls.py | 12 ++++++++
tests/integration_tests/tasks/test_cache.py | 46 ++++++++++++++++++++++++-----
tests/integration_tests/tasks/test_utils.py | 19 ++++++++++--
6 files changed, 82 insertions(+), 14 deletions(-)
diff --git a/UPDATING.md b/UPDATING.md
index 6908c1f906..290417fa6c 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -22,6 +22,12 @@ under the License.
This file documents any backwards-incompatible changes in Superset and
assists people when migrating to a new version.
+## 4.1.2
+
+- [31173](https://github.com/apache/superset/pull/31173) Modified
`fetch_csrf_token` to align with HTTP standards, particularly regarding how
cookies are handled. If you encounter any issues related to CSRF functionality,
please report them as a new issue and reference this PR for context.
+
+### Potential Downtime
+
## 4.1.0
- [29274](https://github.com/apache/superset/pull/29274): We made it easier to
trigger CI on your
@@ -229,7 +235,7 @@ assists people when migrating to a new version.
- [19231](https://github.com/apache/superset/pull/19231): The
`ENABLE_REACT_CRUD_VIEWS` feature flag has been removed (permanently enabled).
Any deployments which had set this flag to false will need to verify that the
React views support their use case.
- [19230](https://github.com/apache/superset/pull/19230): The
`ROW_LEVEL_SECURITY` feature flag has been removed (permanently enabled). Any
deployments which had set this flag to false will need to verify that the
presence of the Row Level Security feature does not interfere with their use
case.
- [19168](https://github.com/apache/superset/pull/19168): Celery upgrade to
5.X resulted in breaking changes to its command line invocation.
-html#step-1-adjust-your-command-line-invocation) instructions for adjustments.
Also consider migrating you Celery config per
[here](https://docs.celeryq.dev/en/stable/userguide/configuration.html#conf-old-settings-map).
+ html#step-1-adjust-your-command-line-invocation) instructions for
adjustments. Also consider migrating you Celery config per
[here](https://docs.celeryq.dev/en/stable/userguide/configuration.html#conf-old-settings-map).
- [19142](https://github.com/apache/superset/pull/19142): The
`VERSIONED_EXPORT` config key is now `True` by default.
- [19113](https://github.com/apache/superset/pull/19113): The
`ENABLE_JAVASCRIPT_CONTROLS` config key has moved from an app config to a
feature flag. Any deployments who overrode this setting will now need to
override the feature flag from here onward.
- [19107](https://github.com/apache/superset/pull/19107): The
`SQLLAB_BACKEND_PERSISTENCE` feature flag is now `True` by default, which
enables persisting SQL Lab tabs in the backend instead of the browser's
`localStorage`.
diff --git a/superset/tasks/cache.py b/superset/tasks/cache.py
index 1509b0d062..18ca7d7165 100644
--- a/superset/tasks/cache.py
+++ b/superset/tasks/cache.py
@@ -33,7 +33,7 @@ from superset.tasks.utils import fetch_csrf_token
from superset.utils import json
from superset.utils.date_parser import parse_human_datetime
from superset.utils.machine_auth import MachineAuthProvider
-from superset.utils.urls import get_url_path
+from superset.utils.urls import get_url_path, is_secure_url
logger = get_task_logger(__name__)
logger.setLevel(logging.INFO)
@@ -220,10 +220,15 @@ def fetch_url(data: str, headers: dict[str, str]) ->
dict[str, str]:
"""
result = {}
try:
+ url = get_url_path("ChartRestApi.warm_up_cache")
+
+ if is_secure_url(url):
+ logger.info("URL '%s' is secure. Adding Referer header.", url)
+ headers.update({"Referer": url})
+
# Fetch CSRF token for API request
headers.update(fetch_csrf_token(headers))
- url = get_url_path("ChartRestApi.warm_up_cache")
logger.info("Fetching %s with payload %s", url, data)
req = request.Request(
url, data=bytes(data, "utf-8"), headers=headers, method="PUT"
diff --git a/superset/tasks/utils.py b/superset/tasks/utils.py
index 6fc799c4ab..5e3bc14808 100644
--- a/superset/tasks/utils.py
+++ b/superset/tasks/utils.py
@@ -133,7 +133,7 @@ def fetch_csrf_token(
data = json.loads(body)
res = {"X-CSRF-Token": data["result"]}
if session_cookie is not None:
- res["Cookie"] = session_cookie
+ res["Cookie"] = f"{session_cookie_name}={session_cookie}"
return res
logger.error("Error fetching CSRF token, status code: %s", response.status)
diff --git a/superset/utils/urls.py b/superset/utils/urls.py
index 57a1b63dd4..9b186f54f3 100644
--- a/superset/utils/urls.py
+++ b/superset/utils/urls.py
@@ -16,6 +16,7 @@
# under the License.
import urllib
from typing import Any
+from urllib.parse import urlparse
from flask import current_app, url_for
@@ -50,3 +51,14 @@ def modify_url_query(url: str, **kwargs: Any) -> str:
f"{k}={urllib.parse.quote(str(v[0]))}" for k, v in params.items()
)
return urllib.parse.urlunsplit(parts)
+
+
+def is_secure_url(url: str) -> bool:
+ """
+ Validates if a URL is secure (uses HTTPS).
+
+ :param url: The URL to validate.
+ :return: True if the URL uses HTTPS (secure), False if it uses HTTP
(non-secure).
+ """
+ parsed_url = urlparse(url)
+ return parsed_url.scheme == "https"
diff --git a/tests/integration_tests/tasks/test_cache.py
b/tests/integration_tests/tasks/test_cache.py
index 6e8d3ffe03..368cb1ebf0 100644
--- a/tests/integration_tests/tasks/test_cache.py
+++ b/tests/integration_tests/tasks/test_cache.py
@@ -22,17 +22,32 @@ from tests.integration_tests.test_app import app
@pytest.mark.parametrize(
- "base_url",
+ "base_url, expected_referer",
[
- "http://base-url",
- "http://base-url/",
+ ("http://base-url", None),
+ ("http://base-url/", None),
+ ("https://base-url", "https://base-url/api/v1/chart/warm_up_cache"),
+ ("https://base-url/", "https://base-url/api/v1/chart/warm_up_cache"),
+ ],
+ ids=[
+ "Without trailing slash (HTTP)",
+ "With trailing slash (HTTP)",
+ "Without trailing slash (HTTPS)",
+ "With trailing slash (HTTPS)",
],
- ids=["Without trailing slash", "With trailing slash"],
)
@mock.patch("superset.tasks.cache.fetch_csrf_token")
@mock.patch("superset.tasks.cache.request.Request")
@mock.patch("superset.tasks.cache.request.urlopen")
-def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token,
base_url):
[email protected]("superset.tasks.cache.is_secure_url")
+def test_fetch_url(
+ mock_is_secure_url,
+ mock_urlopen,
+ mock_request_cls,
+ mock_fetch_csrf_token,
+ base_url,
+ expected_referer,
+):
from superset.tasks.cache import fetch_url
mock_request = mock.MagicMock()
@@ -41,8 +56,17 @@ def test_fetch_url(mock_urlopen, mock_request_cls,
mock_fetch_csrf_token, base_u
mock_urlopen.return_value = mock.MagicMock()
mock_urlopen.return_value.code = 200
+ # Mock the URL validation to return True for HTTPS and False for HTTP
+ mock_is_secure_url.return_value = base_url.startswith("https")
+
initial_headers = {"Cookie": "cookie", "key": "value"}
csrf_headers = initial_headers | {"X-CSRF-Token": "csrf_token"}
+
+ # Conditionally add the Referer header and assert its presence
+ if expected_referer:
+ csrf_headers = csrf_headers | {"Referer": expected_referer}
+ assert csrf_headers["Referer"] == expected_referer
+
mock_fetch_csrf_token.return_value = csrf_headers
app.config["WEBDRIVER_BASEURL"] = base_url
@@ -51,13 +75,21 @@ def test_fetch_url(mock_urlopen, mock_request_cls,
mock_fetch_csrf_token, base_u
result = fetch_url(data, initial_headers)
- assert data == result["success"]
+ expected_url = (
+ f"{base_url}/api/v1/chart/warm_up_cache"
+ if not base_url.endswith("/")
+ else f"{base_url}api/v1/chart/warm_up_cache"
+ )
+
mock_fetch_csrf_token.assert_called_once_with(initial_headers)
+
mock_request_cls.assert_called_once_with(
- "http://base-url/api/v1/chart/warm_up_cache",
+ expected_url, # Use the dynamic URL based on base_url
data=data_encoded,
headers=csrf_headers,
method="PUT",
)
# assert the same Request object is used
mock_urlopen.assert_called_once_with(mock_request, timeout=mock.ANY)
+
+ assert data == result["success"]
diff --git a/tests/integration_tests/tasks/test_utils.py
b/tests/integration_tests/tasks/test_utils.py
index b1213b78c8..29e5f38319 100644
--- a/tests/integration_tests/tasks/test_utils.py
+++ b/tests/integration_tests/tasks/test_utils.py
@@ -26,8 +26,15 @@ from tests.integration_tests.test_app import app
[
"http://base-url",
"http://base-url/",
+ "https://base-url",
+ "https://base-url/",
+ ],
+ ids=[
+ "Without trailing slash (HTTP)",
+ "With trailing slash (HTTP)",
+ "Without trailing slash (HTTPS)",
+ "With trailing slash (HTTPS)",
],
- ids=["Without trailing slash", "With trailing slash"],
)
@mock.patch("superset.tasks.cache.request.Request")
@mock.patch("superset.tasks.cache.request.urlopen")
@@ -52,13 +59,19 @@ def test_fetch_csrf_token(mock_urlopen, mock_request_cls,
base_url, app_context)
result_headers = fetch_csrf_token(headers)
+ expected_url = (
+ f"{base_url}/api/v1/security/csrf_token/"
+ if not base_url.endswith("/")
+ else f"{base_url}api/v1/security/csrf_token/"
+ )
+
mock_request_cls.assert_called_with(
- "http://base-url/api/v1/security/csrf_token/",
+ expected_url,
headers=headers,
method="GET",
)
assert result_headers["X-CSRF-Token"] == "csrf_token"
- assert result_headers["Cookie"] == "new_session_cookie"
+ assert result_headers["Cookie"] == "session=new_session_cookie" # Updated
assertion
# assert the same Request object is used
mock_urlopen.assert_called_once_with(mock_request, timeout=mock.ANY)