dosubot[bot] commented on issue #30381:
URL: https://github.com/apache/superset/issues/30381#issuecomment-2372517537

   <!-- Answer -->
   Certainly! Here is an example of a full `superset_config.py` with HTML 
sanitization configured:
   
   ```python
   # Licensed to the Apache Software Foundation (ASF) under one
   # or more contributor license agreements.  See the NOTICE file
   # distributed with this work for additional information
   # regarding copyright ownership.  The ASF licenses this file
   # to you under the Apache License, Version 2.0 (the
   # "License"); you may not use this file except in compliance
   # with the License.  You may obtain a copy of the License at
   #
   #   http://www.apache.org/licenses/LICENSE-2.0
   #
   # Unless required by applicable law or agreed to in writing,
   # software distributed under the License is distributed on an
   # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   # KIND, either express or implied.  See the License for the
   # specific language governing permissions and limitations
   # under the License.
   """The main config file for Superset
   
   All configuration in this file can be overridden by providing a 
superset_config
   in your PYTHONPATH as there is a ``from superset_config import *``
   at the end of this file.
   """
   
   # mypy: ignore-errors
   # pylint: disable=too-many-lines
   from __future__ import annotations
   
   import imp  # pylint: disable=deprecated-module
   import importlib.util
   import json
   import logging
   import os
   import re
   import sys
   from collections import OrderedDict
   from contextlib import contextmanager
   from datetime import timedelta
   from email.mime.multipart import MIMEMultipart
   from importlib.resources import files
   from typing import Any, Callable, Iterator, Literal, TYPE_CHECKING, TypedDict
   
   import click
   import pkg_resources
   from celery.schedules import crontab
   from flask import Blueprint
   from flask_appbuilder.security.manager import AUTH_DB
   from flask_caching.backends.base import BaseCache
   from pandas import Series
   from pandas._libs.parsers import STR_NA_VALUES
   from sqlalchemy.engine.url import URL
   from sqlalchemy.orm.query import Query
   
   from superset.advanced_data_type.plugins.internet_address import 
internet_address
   from superset.advanced_data_type.plugins.internet_port import internet_port
   from superset.advanced_data_type.types import AdvancedDataType
   from superset.constants import CHANGE_ME_SECRET_KEY
   from superset.jinja_context import BaseTemplateProcessor
   from superset.key_value.types import JsonKeyValueCodec
   from superset.stats_logger import DummyStatsLogger
   from superset.superset_typing import CacheConfig
   from superset.tasks.types import ExecutorType
   from superset.utils import core as utils
   from superset.utils.core import is_test, NO_TIME_RANGE, parse_boolean_string
   from superset.utils.encrypt import SQLAlchemyUtilsAdapter
   from superset.utils.log import DBEventLogger
   from superset.utils.logging_configurator import DefaultLoggingConfigurator
   
   logger = logging.getLogger(__name__)
   
   if TYPE_CHECKING:
       from flask_appbuilder.security.sqla import models
   
       from superset.connectors.sqla.models import SqlaTable
       from superset.models.core import Database
       from superset.models.dashboard import Dashboard
       from superset.models.slice import Slice
   
   # Realtime stats logger, a StatsD implementation exists
   STATS_LOGGER = DummyStatsLogger()
   
   # By default will log events to the metadata database with `DBEventLogger`
   # Note that you can use `StdOutEventLogger` for debugging
   # Note that you can write your own event logger by extending 
`AbstractEventLogger`
   # https://github.com/apache/superset/blob/master/superset/utils/log.py
   EVENT_LOGGER = DBEventLogger()
   
   SUPERSET_LOG_VIEW = True
   
   BASE_DIR = pkg_resources.resource_filename("superset", "")
   if "SUPERSET_HOME" in os.environ:
       DATA_DIR = os.environ["SUPERSET_HOME"]
   else:
       DATA_DIR = os.path.expanduser("~/.superset")
   
   # ---------------------------------------------------------
   # Superset specific config
   # ---------------------------------------------------------
   VERSION_INFO_FILE = str(files("superset") / "static/version_info.json")
   PACKAGE_JSON_FILE = str(files("superset") / "static/assets/package.json")
   # ---------------------------------------------------
   # Image and file configuration
   # ---------------------------------------------------
   # The file upload folder, when using models with files
   UPLOAD_FOLDER = BASE_DIR + "/app/static/uploads/"
   UPLOAD_CHUNK_SIZE = 4096
   
   # The image upload folder, when using models with images
   IMG_UPLOAD_FOLDER = BASE_DIR + "/app/static/uploads/"
   
   # The image upload url, when using models with images
   IMG_UPLOAD_URL = "/static/uploads/"
   # Setup image size default is (300, 200, True)
   # IMG_SIZE = (300, 200, True)
   
   # Default cache timeout, applies to all cache backends unless specifically 
overridden in
   # each cache config.
   CACHE_DEFAULT_TIMEOUT = int(timedelta(days=1).total_seconds())
   
   # Default cache for Superset objects
   CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"}
   
   # Cache for datasource metadata and query results
   DATA_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"}
   
   # Cache for dashboard filter state. `CACHE_TYPE` defaults to 
`SupersetMetastoreCache`
   # that stores the values in the key-value table in the Superset metastore, 
as it's
   # required for Superset to operate correctly, but can be replaced by any
   # `Flask-Caching` backend.
   FILTER_STATE_CACHE_CONFIG: CacheConfig = {
       "CACHE_TYPE": "SupersetMetastoreCache",
       "CACHE_DEFAULT_TIMEOUT": int(timedelta(days=90).total_seconds()),
       # Should the timeout be reset when retrieving a cached value?
       "REFRESH_TIMEOUT_ON_RETRIEVAL": True,
       # The following parameter only applies to `MetastoreCache`:
       # How should entries be serialized/deserialized?
       "CODEC": JsonKeyValueCodec(),
   }
   
   # Cache for explore form data state. `CACHE_TYPE` defaults to 
`SupersetMetastoreCache`
   # that stores the values in the key-value table in the Superset metastore, 
as it's
   # required for Superset to operate correctly, but can be replaced by any
   # `Flask-Caching` backend.
   EXPLORE_FORM_DATA_CACHE_CONFIG: CacheConfig = {
       "CACHE_TYPE": "SupersetMetastoreCache",
       "CACHE_DEFAULT_TIMEOUT": int(timedelta(days=7).total_seconds()),
       # Should the timeout be reset when retrieving a cached value?
       "REFRESH_TIMEOUT_ON_RETRIEVAL": True,
       # The following parameter only applies to `MetastoreCache`:
       # How should entries be serialized/deserialized?
       "CODEC": JsonKeyValueCodec(),
   }
   
   # store cache keys by datasource UID (via CacheKey) for custom 
processing/invalidation
   STORE_CACHE_KEYS_IN_METADATA_DB = False
   
   # CORS Options
   ENABLE_CORS = False
   CORS_OPTIONS: dict[Any, Any] = {}
   
   # Sanitizes the HTML content used in markdowns to allow its rendering in a 
safe manner.
   # Disabling this option is not recommended for security reasons. If you wish 
to allow
   # valid safe elements that are not included in the default sanitization 
schema, use the
   # HTML_SANITIZATION_SCHEMA_EXTENSIONS configuration.
   HTML_SANITIZATION = True
   
   # Use this configuration to extend the HTML sanitization schema.
   # By default we use the GitHub schema defined in
   # https://github.com/syntax-tree/hast-util-sanitize/blob/main/lib/schema.js
   # For example, the following configuration would allow the rendering of the
   # style attribute for div elements and the ftp protocol in hrefs:
   # HTML_SANITIZATION_SCHEMA_EXTENSIONS = {
   #   "attributes": {
   #     "div": ["style"],
   #   },
   #   "protocols": {
   #     "href": ["ftp"],
   #   }
   # }
   # Be careful when extending the default schema to avoid XSS attacks.
   HTML_SANITIZATION_SCHEMA_EXTENSIONS: dict[str, Any] = {
       "attributes": {
           "*": ["style", "className"],
       },
       "tagNames": ["style"],
   }
   
   # Chrome allows up to 6 open connections per domain at a time. When there 
are more
   # than 6 slices in dashboard, a lot of time fetch requests are queued up and 
wait for
   # next available socket. PR #5039 is trying to allow domain sharding for 
Superset,
   # and this feature will be enabled by configuration only (by default Superset
   # doesn't allow cross-domain request).
   SUPERSET_WEBSERVER_DOMAINS = None
   def _try_json_readversion(filepath: str) -> str | None:
       try:
           with open(filepath) as f:
               return json.load(f).get("version")
       except Exception:  # pylint: disable=broad-except
           return None
   def _try_json_readsha(filepath: str, length: int) -> str | None:
       try:
           with open(filepath) as f:
               return json.load(f).get("GIT_SHA")[:length]
       except Exception:  # pylint: disable=broad-except
           return None
   
   
   #
   # If True, we will skip the call to load the logger config found in 
alembic.init
   #
   ALEMBIC_SKIP_LOG_CONFIG = False
   
   # Depending on the context in which this config is loaded, the
   # version_info.json file may or may not be available, as it is
   # generated on install via setup.py. In the event that we're
   # actually running Superset, we will have already installed,
   # therefore it WILL exist. When unit tests are running, however,
   # it WILL NOT exist, so we fall back to reading package.json
   VERSION_STRING = _try_json_readversion(VERSION_INFO_FILE) or 
_try_json_readversion(
       PACKAGE_JSON_FILE
   )
   
   VERSION_SHA_LENGTH = 8
   VERSION_SHA = _try_json_readsha(VERSION_INFO_FILE, VERSION_SHA_LENGTH)
   
   # Build number is shown in the About section if available. This
   # can be replaced at build time to expose build information.
   BUILD_NUMBER = None
   
   # default viz used in chart explorer & SQL Lab explore
   DEFAULT_VIZ_TYPE = "table"
   
   # default row limit when requesting chart data
   ROW_LIMIT = 50000
   # default row limit when requesting samples from datasource in explore view
   SAMPLES_ROW_LIMIT = 1000
   # default row limit for native filters
   NATIVE_FILTER_DEFAULT_ROW_LIMIT = 1000
   # max rows retrieved by filter select auto complete
   FILTER_SELECT_ROW_LIMIT = 10000
   # default time filter in explore
   # values may be "Last day", "Last week", "<ISO date> : now", etc.
   DEFAULT_TIME_FILTER = NO_TIME_RANGE
   
   # This is an important setting, and should be lower than your
   # [load balancer / proxy / envoy / kong / ...] timeout settings.
   # You should also make sure to configure your WSGI server
   # (gunicorn, nginx, apache, ...) timeout setting to be <= to this setting
   SUPERSET_WEBSERVER_TIMEOUT = int(timedelta(minutes=1).total_seconds())
   ```
   
   This configuration includes the `HTML_SANITIZATION` and 
`HTML_SANITIZATION_SCHEMA_EXTENSIONS` settings to allow CSS styles and class 
names in the HTML content 
<sup>[[1]](https://github.com/apache/superset/blob/master/superset/config.py)</sup>.
   
   
   <!-- Dosu Comment Footer -->
   *To continue talking to [Dosu](https://dosu.dev), mention @dosu.*
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to