This is an automated email from the ASF dual-hosted git repository. maximebeauchemin pushed a commit to branch refactor_json in repository https://gitbox.apache.org/repos/asf/superset.git
commit d2d86ade48ba7b46249ed426fced3573f8158dad Author: Maxime Beauchemin <[email protected]> AuthorDate: Mon Apr 29 12:14:01 2024 -0700 refactor json utilities --- superset/async_events/async_query_manager.py | 4 +- superset/charts/data/api.py | 2 +- superset/utils/core.py | 143 +-------------------------- superset/views/base.py | 9 +- superset/views/core.py | 9 +- superset/views/dashboard/views.py | 5 +- 6 files changed, 17 insertions(+), 155 deletions(-) diff --git a/superset/async_events/async_query_manager.py b/superset/async_events/async_query_manager.py index 32cf247cf3..c45af98b99 100644 --- a/superset/async_events/async_query_manager.py +++ b/superset/async_events/async_query_manager.py @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import json import logging import uuid from typing import Any, Literal, Optional @@ -24,6 +23,7 @@ import redis from flask import Flask, request, Request, Response, session from superset.utils.core import get_user_id +from superset.utils.json import json_dumps logger = logging.getLogger(__name__) @@ -245,7 +245,7 @@ class AsyncQueryManager: raise AsyncQueryJobException("No job ID specified") updates = {"status": status, **kwargs} - event_data = {"data": json.dumps({**job_metadata, **updates})} + event_data = {"data": json_dumps({**job_metadata, **updates})} full_stream_name = f"{self._stream_prefix}full" scoped_stream_name = f"{self._stream_prefix}{job_metadata['channel_id']}" diff --git a/superset/charts/data/api.py b/superset/charts/data/api.py index 2e46eb2737..9830be05bf 100644 --- a/superset/charts/data/api.py +++ b/superset/charts/data/api.py @@ -51,8 +51,8 @@ from superset.utils.core import ( create_zip, DatasourceType, get_user_id, - json_int_dttm_ser, ) +from superset.utils.json import json_int_dttm_ser from superset.utils.decorators import logs_context from superset.views.base import CsvResponse, generate_download_headers, XlsxResponse from superset.views.base_api import statsd_metrics diff --git a/superset/utils/core.py b/superset/utils/core.py index f02b004432..85b6862b8c 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -21,9 +21,7 @@ from __future__ import annotations import _thread import collections -import decimal import errno -import json import logging import os import platform @@ -40,7 +38,7 @@ import zlib from collections.abc import Iterable, Iterator, Sequence from contextlib import closing, contextmanager from dataclasses import dataclass -from datetime import date, datetime, time, timedelta +from datetime import timedelta from email.mime.application import MIMEApplication from email.mime.image import MIMEImage from email.mime.multipart import MIMEMultipart @@ -56,7 +54,6 @@ from zipfile import ZipFile import markdown as md import nh3 -import numpy as np import pandas as pd import sqlalchemy as sa from cryptography.hazmat.backends import default_backend @@ -65,7 +62,6 @@ from flask import current_app, g, request from flask_appbuilder import SQLA from flask_appbuilder.security.sqla.models import User from flask_babel import gettext as __ -from flask_babel.speaklater import LazyString from markupsafe import Markup from pandas.api.types import infer_dtype from pandas.core.dtypes.common import is_numeric_dtype @@ -103,7 +99,6 @@ from superset.superset_typing import ( from superset.utils.backports import StrEnum from superset.utils.database import get_example_database from superset.utils.date_parser import parse_human_timedelta -from superset.utils.dates import datetime_to_epoch, EPOCH from superset.utils.hashing import md5_sha_from_dict, md5_sha_from_str if TYPE_CHECKING: @@ -418,133 +413,6 @@ def cast_to_boolean(value: Any) -> bool | None: return False -class DashboardEncoder(json.JSONEncoder): - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - self.sort_keys = True - - def default(self, o: Any) -> dict[Any, Any] | str: - if isinstance(o, uuid.UUID): - return str(o) - try: - vals = {k: v for k, v in o.__dict__.items() if k != "_sa_instance_state"} - return {f"__{o.__class__.__name__}__": vals} - except Exception: # pylint: disable=broad-except - if isinstance(o, datetime): - return {"__datetime__": o.replace(microsecond=0).isoformat()} - return json.JSONEncoder(sort_keys=True).default(o) - - -def format_timedelta(time_delta: timedelta) -> str: - """ - Ensures negative time deltas are easily interpreted by humans - - >>> td = timedelta(0) - timedelta(days=1, hours=5,minutes=6) - >>> str(td) - '-2 days, 18:54:00' - >>> format_timedelta(td) - '-1 day, 5:06:00' - """ - if time_delta < timedelta(0): - return "-" + str(abs(time_delta)) - - # Change this to format positive time deltas the way you want - return str(time_delta) - - -def base_json_conv(obj: Any) -> Any: - """ - Tries to convert additional types to JSON compatible forms. - - :param obj: The serializable object - :returns: The JSON compatible form - :raises TypeError: If the object cannot be serialized - :see: https://docs.python.org/3/library/json.html#encoders-and-decoders - """ - - if isinstance(obj, memoryview): - obj = obj.tobytes() - if isinstance(obj, np.int64): - return int(obj) - if isinstance(obj, np.bool_): - return bool(obj) - if isinstance(obj, np.ndarray): - return obj.tolist() - if isinstance(obj, set): - return list(obj) - if isinstance(obj, decimal.Decimal): - return float(obj) - if isinstance(obj, (uuid.UUID, time, LazyString)): - return str(obj) - if isinstance(obj, timedelta): - return format_timedelta(obj) - if isinstance(obj, bytes): - try: - return obj.decode("utf-8") - except Exception: # pylint: disable=broad-except - return "[bytes]" - - raise TypeError(f"Unserializable object {obj} of type {type(obj)}") - - -def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> Any: - """ - A JSON serializer that deals with dates by serializing them to ISO 8601. - - >>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_iso_dttm_ser) - '{"dttm": "1970-01-01T00:00:00"}' - - :param obj: The serializable object - :param pessimistic: Whether to be pessimistic regarding serialization - :returns: The JSON compatible form - :raises TypeError: If the non-pessimistic object cannot be serialized - """ - - if isinstance(obj, (datetime, date, pd.Timestamp)): - return obj.isoformat() - - try: - return base_json_conv(obj) - except TypeError as ex: - if pessimistic: - return f"Unserializable [{type(obj)}]" - - raise ex - - -def pessimistic_json_iso_dttm_ser(obj: Any) -> Any: - """Proxy to call json_iso_dttm_ser in a pessimistic way - - If one of object is not serializable to json, it will still succeed""" - return json_iso_dttm_ser(obj, pessimistic=True) - - -def json_int_dttm_ser(obj: Any) -> Any: - """ - A JSON serializer that deals with dates by serializing them to EPOCH. - - >>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_int_dttm_ser) - '{"dttm": 0.0}' - - :param obj: The serializable object - :returns: The JSON compatible form - :raises TypeError: If the object cannot be serialized - """ - - if isinstance(obj, (datetime, pd.Timestamp)): - return datetime_to_epoch(obj) - - if isinstance(obj, date): - return (obj - EPOCH.date()).total_seconds() * 1000 - - return base_json_conv(obj) - - -def json_dumps_w_dates(payload: dict[Any, Any], sort_keys: bool = False) -> str: - """Dumps payload to JSON with Datetime objects properly converted""" - return json.dumps(payload, default=json_int_dttm_ser, sort_keys=sort_keys) - - def error_msg_from_exception(ex: Exception) -> str: """Translate exception into error message @@ -686,15 +554,6 @@ def get_datasource_full_name( return f"[{database_name}].[{schema}].[{datasource_name}]" -def validate_json(obj: bytes | bytearray | str) -> None: - if obj: - try: - json.loads(obj) - except Exception as ex: - logger.error("JSON is not valid %s", str(ex), exc_info=True) - raise SupersetException("JSON is not valid") from ex - - class SigalrmTimeout: """ To be used in a ``with`` block and timeout its content. diff --git a/superset/views/base.py b/superset/views/base.py index 1df5b6a665..18e43ab4b3 100644 --- a/superset/views/base.py +++ b/superset/views/base.py @@ -80,6 +80,7 @@ from superset.reports.models import ReportRecipientType from superset.superset_typing import FlaskResponse from superset.translations.utils import get_language_pack from superset.utils import core as utils +from superset.utils import json as json_utils from superset.utils.filters import get_dataset_access_filters from .utils import bootstrap_user_data @@ -149,7 +150,7 @@ def json_error_response( payload = payload or {"error": f"{msg}"} return Response( - json.dumps(payload, default=utils.json_iso_dttm_ser, ignore_nan=True), + json.dumps(payload, default=json_utils.json_iso_dttm_ser, ignore_nan=True), status=status, mimetype="application/json", ) @@ -164,7 +165,7 @@ def json_errors_response( payload["errors"] = [dataclasses.asdict(error) for error in errors] return Response( - json.dumps(payload, default=utils.json_iso_dttm_ser, ignore_nan=True), + json.dumps(payload, default=json_utils.json_iso_dttm_ser, ignore_nan=True), status=status, mimetype="application/json; charset=utf-8", ) @@ -286,7 +287,7 @@ class BaseSupersetView(BaseView): @staticmethod def json_response(obj: Any, status: int = 200) -> FlaskResponse: return Response( - json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True), + json.dumps(obj, default=json_utils.json_int_dttm_ser, ignore_nan=True), status=status, mimetype="application/json", ) @@ -303,7 +304,7 @@ class BaseSupersetView(BaseView): "superset/spa.html", entry="spa", bootstrap_data=json.dumps( - payload, default=utils.pessimistic_json_iso_dttm_ser + payload, default=json_utils.pessimistic_json_iso_dttm_ser ), ) diff --git a/superset/views/core.py b/superset/views/core.py index 35ba3d0428..ace8dd47c3 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -72,13 +72,14 @@ from superset.models.sql_lab import Query from superset.models.user_attributes import UserAttribute from superset.superset_typing import FlaskResponse from superset.utils import core as utils +from superset.utils import json as json_utils from superset.utils.cache import etag_cache from superset.utils.core import ( - base_json_conv, DatasourceType, get_user_id, ReservedUrlParameters, ) +from superset.utils.json import base_json_conv from superset.views.base import ( api, BaseSupersetView, @@ -577,7 +578,7 @@ class Superset(BaseSupersetView): return self.render_template( "superset/basic.html", bootstrap_data=json.dumps( - bootstrap_data, default=utils.pessimistic_json_iso_dttm_ser + bootstrap_data, default=json_utils.pessimistic_json_iso_dttm_ser ), entry="explore", title=title, @@ -819,7 +820,7 @@ class Superset(BaseSupersetView): "user": bootstrap_user_data(g.user, include_perms=True), "common": common_bootstrap_payload(), }, - default=utils.pessimistic_json_iso_dttm_ser, + default=json_utils.pessimistic_json_iso_dttm_ser, ), standalone_mode=ReservedUrlParameters.is_standalone_mode(), ) @@ -920,7 +921,7 @@ class Superset(BaseSupersetView): "superset/spa.html", entry="spa", bootstrap_data=json.dumps( - payload, default=utils.pessimistic_json_iso_dttm_ser + payload, default=json_utils.pessimistic_json_iso_dttm_ser ), ) diff --git a/superset/views/dashboard/views.py b/superset/views/dashboard/views.py index 33252084ad..3461c1ff57 100644 --- a/superset/views/dashboard/views.py +++ b/superset/views/dashboard/views.py @@ -32,6 +32,7 @@ from superset.constants import MODEL_VIEW_RW_METHOD_PERMISSION_MAP, RouteMethod from superset.models.dashboard import Dashboard as DashboardModel from superset.superset_typing import FlaskResponse from superset.utils import core as utils +from superset.utils import json as json_utils from superset.views.base import ( BaseSupersetView, common_bootstrap_payload, @@ -94,8 +95,8 @@ class DashboardModelView(DashboardMixin, SupersetModelView, DeleteMixin): # pyl item.slug = re.sub(r"[^\w\-]+", "", item.slug) if g.user not in item.owners: item.owners.append(g.user) - utils.validate_json(item.json_metadata) - utils.validate_json(item.position_json) + json_utils.validate_json(item.json_metadata) + json_utils.validate_json(item.position_json) for slc in item.slices: slc.owners = list(set(item.owners) | set(slc.owners))
